1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
90 #include "insn-config.h"
100 #include "tree-ssa.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 unsigned use_id
; /* The identifier in the use if it is the case. */
147 bool biv_p
; /* Is it a biv? */
148 bool have_use_for
; /* Do we already have a use for it? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_ADDRESS
, /* Use in an address. */
170 USE_COMPARE
/* Use is a compare. */
173 /* Cost of a computation. */
176 int cost
; /* The runtime cost. */
177 unsigned complexity
; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
183 static const comp_cost no_cost
= {0, 0};
184 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
186 /* The candidate - cost pair. */
189 struct iv_cand
*cand
; /* The candidate. */
190 comp_cost cost
; /* The cost. */
191 bitmap depends_on
; /* The list of invariants that have to be
193 tree value
; /* For final value elimination, the expression for
194 the final value of the iv. For iv elimination,
195 the new bound to compare with. */
196 enum tree_code comp
; /* For iv elimination, the comparison. */
197 int inv_expr_id
; /* Loop invariant expression id. */
203 unsigned id
; /* The id of the use. */
204 unsigned sub_id
; /* The id of the sub use. */
205 enum use_type type
; /* Type of the use. */
206 struct iv
*iv
; /* The induction variable it is based on. */
207 gimple
*stmt
; /* Statement in that it occurs. */
208 tree
*op_p
; /* The place where it occurs. */
209 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
212 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
213 struct cost_pair
*cost_map
;
214 /* The costs wrto the iv candidates. */
216 struct iv_cand
*selected
;
217 /* The selected candidate. */
219 struct iv_use
*next
; /* The next sub use. */
220 tree addr_base
; /* Base address with const offset stripped. */
221 unsigned HOST_WIDE_INT addr_offset
;
222 /* Const offset stripped from base address. */
225 /* The position where the iv is computed. */
228 IP_NORMAL
, /* At the end, just before the exit condition. */
229 IP_END
, /* At the end of the latch block. */
230 IP_BEFORE_USE
, /* Immediately before a specific use. */
231 IP_AFTER_USE
, /* Immediately after a specific use. */
232 IP_ORIGINAL
/* The original biv. */
235 /* The induction variable candidate. */
238 unsigned id
; /* The number of the candidate. */
239 bool important
; /* Whether this is an "important" candidate, i.e. such
240 that it should be considered by all uses. */
241 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
242 gimple
*incremented_at
;/* For original biv, the statement where it is
244 tree var_before
; /* The variable used for it before increment. */
245 tree var_after
; /* The variable used for it after increment. */
246 struct iv
*iv
; /* The value of the candidate. NULL for
247 "pseudocandidate" used to indicate the possibility
248 to replace the final value of an iv by direct
249 computation of the value. */
250 unsigned cost
; /* Cost of the candidate. */
251 unsigned cost_step
; /* Cost of the candidate's increment operation. */
252 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
253 where it is incremented. */
254 bitmap depends_on
; /* The list of invariants that are used in step of the
256 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
260 /* Loop invariant expression hashtable entry. */
261 struct iv_inv_expr_ent
268 /* The data used by the induction variable optimizations. */
270 typedef struct iv_use
*iv_use_p
;
272 typedef struct iv_cand
*iv_cand_p
;
274 /* Hashtable helpers. */
276 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
278 static inline hashval_t
hash (const iv_inv_expr_ent
*);
279 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
282 /* Hash function for loop invariant expressions. */
285 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
290 /* Hash table equality function for expressions. */
293 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
294 const iv_inv_expr_ent
*expr2
)
296 return expr1
->hash
== expr2
->hash
297 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
302 /* The currently optimized loop. */
303 struct loop
*current_loop
;
304 source_location loop_loc
;
306 /* Numbers of iterations for all exits of the current loop. */
307 hash_map
<edge
, tree_niter_desc
*> *niters
;
309 /* Number of registers used in it. */
312 /* The size of version_info array allocated. */
313 unsigned version_info_size
;
315 /* The array of information for the ssa names. */
316 struct version_info
*version_info
;
318 /* The hashtable of loop invariant expressions created
320 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
322 /* Loop invariant expression id. */
325 /* The bitmap of indices in version_info whose value was changed. */
328 /* The uses of induction variables. */
329 vec
<iv_use_p
> iv_uses
;
331 /* The candidates. */
332 vec
<iv_cand_p
> iv_candidates
;
334 /* A bitmap of important candidates. */
335 bitmap important_candidates
;
337 /* Cache used by tree_to_aff_combination_expand. */
338 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
340 /* The maximum invariant id. */
343 /* Number of no_overflow BIVs which are not used in memory address. */
344 unsigned bivs_not_used_in_addr
;
346 /* Obstack for iv structure. */
347 struct obstack iv_obstack
;
349 /* Whether to consider just related and important candidates when replacing a
351 bool consider_all_candidates
;
353 /* Are we optimizing for speed? */
356 /* Whether the loop body includes any function calls. */
357 bool body_includes_call
;
359 /* Whether the loop body can only be exited via single exit. */
360 bool loop_single_exit_p
;
363 /* An assignment of iv candidates to uses. */
367 /* The number of uses covered by the assignment. */
370 /* Number of uses that cannot be expressed by the candidates in the set. */
373 /* Candidate assigned to a use, together with the related costs. */
374 struct cost_pair
**cand_for_use
;
376 /* Number of times each candidate is used. */
377 unsigned *n_cand_uses
;
379 /* The candidates used. */
382 /* The number of candidates in the set. */
385 /* Total number of registers needed. */
388 /* Total cost of expressing uses. */
389 comp_cost cand_use_cost
;
391 /* Total cost of candidates. */
394 /* Number of times each invariant is used. */
395 unsigned *n_invariant_uses
;
397 /* The array holding the number of uses of each loop
398 invariant expressions created by ivopt. */
399 unsigned *used_inv_expr
;
401 /* The number of created loop invariants. */
402 unsigned num_used_inv_expr
;
404 /* Total cost of the assignment. */
408 /* Difference of two iv candidate assignments. */
415 /* An old assignment (for rollback purposes). */
416 struct cost_pair
*old_cp
;
418 /* A new assignment. */
419 struct cost_pair
*new_cp
;
421 /* Next change in the list. */
422 struct iv_ca_delta
*next_change
;
425 /* Bound on number of candidates below that all candidates are considered. */
427 #define CONSIDER_ALL_CANDIDATES_BOUND \
428 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
430 /* If there are more iv occurrences, we just give up (it is quite unlikely that
431 optimizing such a loop would help, and it would take ages). */
433 #define MAX_CONSIDERED_USES \
434 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
436 /* If there are at most this number of ivs in the set, try removing unnecessary
437 ivs from the set always. */
439 #define ALWAYS_PRUNE_CAND_SET_BOUND \
440 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
442 /* The list of trees for that the decl_rtl field must be reset is stored
445 static vec
<tree
> decl_rtl_to_reset
;
447 static comp_cost
force_expr_to_var_cost (tree
, bool);
449 /* Number of uses recorded in DATA. */
451 static inline unsigned
452 n_iv_uses (struct ivopts_data
*data
)
454 return data
->iv_uses
.length ();
457 /* Ith use recorded in DATA. */
459 static inline struct iv_use
*
460 iv_use (struct ivopts_data
*data
, unsigned i
)
462 return data
->iv_uses
[i
];
465 /* Number of candidates recorded in DATA. */
467 static inline unsigned
468 n_iv_cands (struct ivopts_data
*data
)
470 return data
->iv_candidates
.length ();
473 /* Ith candidate recorded in DATA. */
475 static inline struct iv_cand
*
476 iv_cand (struct ivopts_data
*data
, unsigned i
)
478 return data
->iv_candidates
[i
];
481 /* The single loop exit if it dominates the latch, NULL otherwise. */
484 single_dom_exit (struct loop
*loop
)
486 edge exit
= single_exit (loop
);
491 if (!just_once_each_iteration_p (loop
, exit
->src
))
497 /* Dumps information about the induction variable IV to FILE. */
500 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
502 if (iv
->ssa_name
&& dump_name
)
504 fprintf (file
, "ssa name ");
505 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
506 fprintf (file
, "\n");
509 fprintf (file
, " type ");
510 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
511 fprintf (file
, "\n");
515 fprintf (file
, " base ");
516 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
517 fprintf (file
, "\n");
519 fprintf (file
, " step ");
520 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
521 fprintf (file
, "\n");
525 fprintf (file
, " invariant ");
526 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
527 fprintf (file
, "\n");
532 fprintf (file
, " base object ");
533 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
534 fprintf (file
, "\n");
538 fprintf (file
, " is a biv\n");
541 fprintf (file
, " iv doesn't overflow wrto loop niter\n");
544 /* Dumps information about the USE to FILE. */
547 dump_use (FILE *file
, struct iv_use
*use
)
549 fprintf (file
, "use %d", use
->id
);
551 fprintf (file
, ".%d", use
->sub_id
);
553 fprintf (file
, "\n");
557 case USE_NONLINEAR_EXPR
:
558 fprintf (file
, " generic\n");
562 fprintf (file
, " address\n");
566 fprintf (file
, " compare\n");
573 fprintf (file
, " in statement ");
574 print_gimple_stmt (file
, use
->stmt
, 0, 0);
575 fprintf (file
, "\n");
577 fprintf (file
, " at position ");
579 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
580 fprintf (file
, "\n");
582 dump_iv (file
, use
->iv
, false);
584 if (use
->related_cands
)
586 fprintf (file
, " related candidates ");
587 dump_bitmap (file
, use
->related_cands
);
591 /* Dumps information about the uses to FILE. */
594 dump_uses (FILE *file
, struct ivopts_data
*data
)
599 for (i
= 0; i
< n_iv_uses (data
); i
++)
601 use
= iv_use (data
, i
);
604 dump_use (file
, use
);
608 fprintf (file
, "\n");
612 /* Dumps information about induction variable candidate CAND to FILE. */
615 dump_cand (FILE *file
, struct iv_cand
*cand
)
617 struct iv
*iv
= cand
->iv
;
619 fprintf (file
, "candidate %d%s\n",
620 cand
->id
, cand
->important
? " (important)" : "");
622 if (cand
->depends_on
)
624 fprintf (file
, " depends on ");
625 dump_bitmap (file
, cand
->depends_on
);
630 fprintf (file
, " final value replacement\n");
634 if (cand
->var_before
)
636 fprintf (file
, " var_before ");
637 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
638 fprintf (file
, "\n");
642 fprintf (file
, " var_after ");
643 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
644 fprintf (file
, "\n");
650 fprintf (file
, " incremented before exit test\n");
654 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
658 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
662 fprintf (file
, " incremented at end\n");
666 fprintf (file
, " original biv\n");
670 dump_iv (file
, iv
, false);
673 /* Returns the info for ssa version VER. */
675 static inline struct version_info
*
676 ver_info (struct ivopts_data
*data
, unsigned ver
)
678 return data
->version_info
+ ver
;
681 /* Returns the info for ssa name NAME. */
683 static inline struct version_info
*
684 name_info (struct ivopts_data
*data
, tree name
)
686 return ver_info (data
, SSA_NAME_VERSION (name
));
689 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
693 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
695 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
699 if (sbb
== loop
->latch
)
705 return stmt
== last_stmt (bb
);
708 /* Returns true if STMT if after the place where the original induction
709 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
710 if the positions are identical. */
713 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
715 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
716 basic_block stmt_bb
= gimple_bb (stmt
);
718 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
721 if (stmt_bb
!= cand_bb
)
725 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
727 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
730 /* Returns true if STMT if after the place where the induction variable
731 CAND is incremented in LOOP. */
734 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
742 return stmt_after_ip_normal_pos (loop
, stmt
);
746 return stmt_after_inc_pos (cand
, stmt
, false);
749 return stmt_after_inc_pos (cand
, stmt
, true);
756 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
759 abnormal_ssa_name_p (tree exp
)
764 if (TREE_CODE (exp
) != SSA_NAME
)
767 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
770 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
771 abnormal phi node. Callback for for_each_index. */
774 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
775 void *data ATTRIBUTE_UNUSED
)
777 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
779 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
781 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
785 return !abnormal_ssa_name_p (*index
);
788 /* Returns true if EXPR contains a ssa name that occurs in an
789 abnormal phi node. */
792 contains_abnormal_ssa_name_p (tree expr
)
795 enum tree_code_class codeclass
;
800 code
= TREE_CODE (expr
);
801 codeclass
= TREE_CODE_CLASS (code
);
803 if (code
== SSA_NAME
)
804 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
806 if (code
== INTEGER_CST
807 || is_gimple_min_invariant (expr
))
810 if (code
== ADDR_EXPR
)
811 return !for_each_index (&TREE_OPERAND (expr
, 0),
812 idx_contains_abnormal_ssa_name_p
,
815 if (code
== COND_EXPR
)
816 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
824 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
829 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
841 /* Returns the structure describing number of iterations determined from
842 EXIT of DATA->current_loop, or NULL if something goes wrong. */
844 static struct tree_niter_desc
*
845 niter_for_exit (struct ivopts_data
*data
, edge exit
)
847 struct tree_niter_desc
*desc
;
848 tree_niter_desc
**slot
;
852 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
856 slot
= data
->niters
->get (exit
);
860 /* Try to determine number of iterations. We cannot safely work with ssa
861 names that appear in phi nodes on abnormal edges, so that we do not
862 create overlapping life ranges for them (PR 27283). */
863 desc
= XNEW (struct tree_niter_desc
);
864 if (!number_of_iterations_exit (data
->current_loop
,
866 || contains_abnormal_ssa_name_p (desc
->niter
))
871 data
->niters
->put (exit
, desc
);
879 /* Returns the structure describing number of iterations determined from
880 single dominating exit of DATA->current_loop, or NULL if something
883 static struct tree_niter_desc
*
884 niter_for_single_dom_exit (struct ivopts_data
*data
)
886 edge exit
= single_dom_exit (data
->current_loop
);
891 return niter_for_exit (data
, exit
);
894 /* Initializes data structures used by the iv optimization pass, stored
898 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
900 data
->version_info_size
= 2 * num_ssa_names
;
901 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
902 data
->relevant
= BITMAP_ALLOC (NULL
);
903 data
->important_candidates
= BITMAP_ALLOC (NULL
);
904 data
->max_inv_id
= 0;
906 data
->iv_uses
.create (20);
907 data
->iv_candidates
.create (20);
908 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
909 data
->inv_expr_id
= 0;
910 data
->name_expansion_cache
= NULL
;
911 decl_rtl_to_reset
.create (20);
912 gcc_obstack_init (&data
->iv_obstack
);
915 /* Returns a memory object to that EXPR points. In case we are able to
916 determine that it does not point to any such object, NULL is returned. */
919 determine_base_object (tree expr
)
921 enum tree_code code
= TREE_CODE (expr
);
924 /* If this is a pointer casted to any type, we need to determine
925 the base object for the pointer; so handle conversions before
926 throwing away non-pointer expressions. */
927 if (CONVERT_EXPR_P (expr
))
928 return determine_base_object (TREE_OPERAND (expr
, 0));
930 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
939 obj
= TREE_OPERAND (expr
, 0);
940 base
= get_base_address (obj
);
945 if (TREE_CODE (base
) == MEM_REF
)
946 return determine_base_object (TREE_OPERAND (base
, 0));
948 return fold_convert (ptr_type_node
,
949 build_fold_addr_expr (base
));
951 case POINTER_PLUS_EXPR
:
952 return determine_base_object (TREE_OPERAND (expr
, 0));
956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
960 return fold_convert (ptr_type_node
, expr
);
964 /* Return true if address expression with non-DECL_P operand appears
968 contain_complex_addr_expr (tree expr
)
973 switch (TREE_CODE (expr
))
975 case POINTER_PLUS_EXPR
:
978 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
979 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
983 return (!DECL_P (TREE_OPERAND (expr
, 0)));
992 /* Allocates an induction variable with given initial value BASE and step STEP
993 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
996 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
997 bool no_overflow
= false)
1000 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1001 sizeof (struct iv
));
1002 gcc_assert (step
!= NULL_TREE
);
1004 /* Lower address expression in base except ones with DECL_P as operand.
1006 1) More accurate cost can be computed for address expressions;
1007 2) Duplicate candidates won't be created for bases in different
1008 forms, like &a[0] and &a. */
1010 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1011 || contain_complex_addr_expr (expr
))
1014 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1015 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1019 iv
->base_object
= determine_base_object (base
);
1022 iv
->have_use_for
= false;
1024 iv
->ssa_name
= NULL_TREE
;
1025 iv
->no_overflow
= no_overflow
;
1026 iv
->have_address_use
= false;
1031 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1032 doesn't overflow. */
1035 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1038 struct version_info
*info
= name_info (data
, iv
);
1040 gcc_assert (!info
->iv
);
1042 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1043 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1044 info
->iv
->ssa_name
= iv
;
1047 /* Finds induction variable declaration for VAR. */
1050 get_iv (struct ivopts_data
*data
, tree var
)
1053 tree type
= TREE_TYPE (var
);
1055 if (!POINTER_TYPE_P (type
)
1056 && !INTEGRAL_TYPE_P (type
))
1059 if (!name_info (data
, var
)->iv
)
1061 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1064 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1065 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1068 return name_info (data
, var
)->iv
;
1071 /* Return the first non-invariant ssa var found in EXPR. */
1074 extract_single_var_from_expr (tree expr
)
1078 enum tree_code code
;
1080 if (!expr
|| is_gimple_min_invariant (expr
))
1083 code
= TREE_CODE (expr
);
1084 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1086 n
= TREE_OPERAND_LENGTH (expr
);
1087 for (i
= 0; i
< n
; i
++)
1089 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1095 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1098 /* Finds basic ivs. */
1101 find_bivs (struct ivopts_data
*data
)
1105 tree step
, type
, base
, stop
;
1107 struct loop
*loop
= data
->current_loop
;
1110 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1114 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1117 if (virtual_operand_p (PHI_RESULT (phi
)))
1120 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1123 if (integer_zerop (iv
.step
))
1127 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1128 /* Stop expanding iv base at the first ssa var referred by iv step.
1129 Ideally we should stop at any ssa var, because that's expensive
1130 and unusual to happen, we just do it on the first one.
1132 See PR64705 for the rationale. */
1133 stop
= extract_single_var_from_expr (step
);
1134 base
= expand_simple_operations (base
, stop
);
1135 if (contains_abnormal_ssa_name_p (base
)
1136 || contains_abnormal_ssa_name_p (step
))
1139 type
= TREE_TYPE (PHI_RESULT (phi
));
1140 base
= fold_convert (type
, base
);
1143 if (POINTER_TYPE_P (type
))
1144 step
= convert_to_ptrofftype (step
);
1146 step
= fold_convert (type
, step
);
1149 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1156 /* Marks basic ivs. */
1159 mark_bivs (struct ivopts_data
*data
)
1164 struct iv
*iv
, *incr_iv
;
1165 struct loop
*loop
= data
->current_loop
;
1166 basic_block incr_bb
;
1169 data
->bivs_not_used_in_addr
= 0;
1170 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1174 iv
= get_iv (data
, PHI_RESULT (phi
));
1178 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1179 def
= SSA_NAME_DEF_STMT (var
);
1180 /* Don't mark iv peeled from other one as biv. */
1182 && gimple_code (def
) == GIMPLE_PHI
1183 && gimple_bb (def
) == loop
->header
)
1186 incr_iv
= get_iv (data
, var
);
1190 /* If the increment is in the subloop, ignore it. */
1191 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1192 if (incr_bb
->loop_father
!= data
->current_loop
1193 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1197 incr_iv
->biv_p
= true;
1198 if (iv
->no_overflow
)
1199 data
->bivs_not_used_in_addr
++;
1200 if (incr_iv
->no_overflow
)
1201 data
->bivs_not_used_in_addr
++;
1205 /* Checks whether STMT defines a linear induction variable and stores its
1206 parameters to IV. */
1209 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1212 struct loop
*loop
= data
->current_loop
;
1214 iv
->base
= NULL_TREE
;
1215 iv
->step
= NULL_TREE
;
1217 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1220 lhs
= gimple_assign_lhs (stmt
);
1221 if (TREE_CODE (lhs
) != SSA_NAME
)
1224 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1227 /* Stop expanding iv base at the first ssa var referred by iv step.
1228 Ideally we should stop at any ssa var, because that's expensive
1229 and unusual to happen, we just do it on the first one.
1231 See PR64705 for the rationale. */
1232 stop
= extract_single_var_from_expr (iv
->step
);
1233 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1234 if (contains_abnormal_ssa_name_p (iv
->base
)
1235 || contains_abnormal_ssa_name_p (iv
->step
))
1238 /* If STMT could throw, then do not consider STMT as defining a GIV.
1239 While this will suppress optimizations, we can not safely delete this
1240 GIV and associated statements, even if it appears it is not used. */
1241 if (stmt_could_throw_p (stmt
))
1247 /* Finds general ivs in statement STMT. */
1250 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1254 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1257 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1260 /* Finds general ivs in basic block BB. */
1263 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1265 gimple_stmt_iterator bsi
;
1267 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1268 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1271 /* Finds general ivs. */
1274 find_givs (struct ivopts_data
*data
)
1276 struct loop
*loop
= data
->current_loop
;
1277 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1280 for (i
= 0; i
< loop
->num_nodes
; i
++)
1281 find_givs_in_bb (data
, body
[i
]);
1285 /* For each ssa name defined in LOOP determines whether it is an induction
1286 variable and if so, its initial value and step. */
1289 find_induction_variables (struct ivopts_data
*data
)
1294 if (!find_bivs (data
))
1300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1302 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1306 fprintf (dump_file
, " number of iterations ");
1307 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1308 if (!integer_zerop (niter
->may_be_zero
))
1310 fprintf (dump_file
, "; zero if ");
1311 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1313 fprintf (dump_file
, "\n\n");
1316 fprintf (dump_file
, "Induction variables:\n\n");
1318 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1320 if (ver_info (data
, i
)->iv
)
1321 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1328 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1329 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1330 is the const offset stripped from IV base. For uses of other types,
1331 ADDR_BASE and ADDR_OFFSET are zero by default. */
1333 static struct iv_use
*
1334 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1335 gimple
*stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1336 unsigned HOST_WIDE_INT addr_offset
= 0)
1338 struct iv_use
*use
= XCNEW (struct iv_use
);
1340 use
->id
= n_iv_uses (data
);
1342 use
->type
= use_type
;
1346 use
->related_cands
= BITMAP_ALLOC (NULL
);
1348 use
->addr_base
= addr_base
;
1349 use
->addr_offset
= addr_offset
;
1351 data
->iv_uses
.safe_push (use
);
1356 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1357 The sub use is recorded under the one whose use id is ID_GROUP. */
1359 static struct iv_use
*
1360 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1361 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
,
1362 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1363 unsigned int id_group
)
1365 struct iv_use
*use
= XCNEW (struct iv_use
);
1366 struct iv_use
*group
= iv_use (data
, id_group
);
1368 use
->id
= group
->id
;
1370 use
->type
= use_type
;
1374 use
->related_cands
= NULL
;
1375 use
->addr_base
= addr_base
;
1376 use
->addr_offset
= addr_offset
;
1378 /* Sub use list is maintained in offset ascending order. */
1379 if (addr_offset
<= group
->addr_offset
)
1381 use
->related_cands
= group
->related_cands
;
1382 group
->related_cands
= NULL
;
1384 data
->iv_uses
[id_group
] = use
;
1392 group
= group
->next
;
1394 while (group
&& addr_offset
> group
->addr_offset
);
1395 use
->next
= pre
->next
;
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403 NONLINEAR_USE is true if the invariant is used in a way we do not
1404 handle specially. */
1407 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1410 struct version_info
*info
;
1412 if (TREE_CODE (op
) != SSA_NAME
1413 || virtual_operand_p (op
))
1416 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1418 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1421 info
= name_info (data
, op
);
1423 info
->has_nonlin_use
|= nonlinear_use
;
1425 info
->inv_id
= ++data
->max_inv_id
;
1426 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1429 /* Checks whether the use OP is interesting and if so, records it. */
1431 static struct iv_use
*
1432 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1438 if (TREE_CODE (op
) != SSA_NAME
)
1441 iv
= get_iv (data
, op
);
1445 if (iv
->have_use_for
)
1447 use
= iv_use (data
, iv
->use_id
);
1449 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1453 if (integer_zerop (iv
->step
))
1455 record_invariant (data
, op
, true);
1458 iv
->have_use_for
= true;
1460 stmt
= SSA_NAME_DEF_STMT (op
);
1461 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1462 || is_gimple_assign (stmt
));
1464 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1465 iv
->use_id
= use
->id
;
1470 /* Given a condition in statement STMT, checks whether it is a compare
1471 of an induction variable and an invariant. If this is the case,
1472 CONTROL_VAR is set to location of the iv, BOUND to the location of
1473 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1474 induction variable descriptions, and true is returned. If this is not
1475 the case, CONTROL_VAR and BOUND are set to the arguments of the
1476 condition and false is returned. */
1479 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1480 tree
**control_var
, tree
**bound
,
1481 struct iv
**iv_var
, struct iv
**iv_bound
)
1483 /* The objects returned when COND has constant operands. */
1484 static struct iv const_iv
;
1486 tree
*op0
= &zero
, *op1
= &zero
;
1487 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1490 if (gimple_code (stmt
) == GIMPLE_COND
)
1492 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1493 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1494 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1498 op0
= gimple_assign_rhs1_ptr (stmt
);
1499 op1
= gimple_assign_rhs2_ptr (stmt
);
1502 zero
= integer_zero_node
;
1503 const_iv
.step
= integer_zero_node
;
1505 if (TREE_CODE (*op0
) == SSA_NAME
)
1506 iv0
= get_iv (data
, *op0
);
1507 if (TREE_CODE (*op1
) == SSA_NAME
)
1508 iv1
= get_iv (data
, *op1
);
1510 /* Exactly one of the compared values must be an iv, and the other one must
1515 if (integer_zerop (iv0
->step
))
1517 /* Control variable may be on the other side. */
1518 std::swap (op0
, op1
);
1519 std::swap (iv0
, iv1
);
1521 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1536 /* Checks whether the condition in STMT is interesting and if so,
1540 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1542 tree
*var_p
, *bound_p
;
1545 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1547 find_interesting_uses_op (data
, *var_p
);
1548 find_interesting_uses_op (data
, *bound_p
);
1552 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556 relative to the loop LOOP, i.e. if all its operands are defined
1557 outside of the returned loop. Returns NULL if EXPR is not
1558 even obviously invariant in LOOP. */
1561 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1566 if (is_gimple_min_invariant (expr
))
1567 return current_loops
->tree_root
;
1569 if (TREE_CODE (expr
) == SSA_NAME
)
1571 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1574 if (flow_bb_inside_loop_p (loop
, def_bb
))
1576 return superloop_at_depth (loop
,
1577 loop_depth (def_bb
->loop_father
) + 1);
1580 return current_loops
->tree_root
;
1586 unsigned maxdepth
= 0;
1587 len
= TREE_OPERAND_LENGTH (expr
);
1588 for (i
= 0; i
< len
; i
++)
1590 struct loop
*ivloop
;
1591 if (!TREE_OPERAND (expr
, i
))
1594 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1597 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1600 return superloop_at_depth (loop
, maxdepth
);
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604 i.e. if all its operands are defined outside of the LOOP. LOOP
1605 should not be the function body. */
1608 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1613 gcc_assert (loop_depth (loop
) > 0);
1615 if (is_gimple_min_invariant (expr
))
1618 if (TREE_CODE (expr
) == SSA_NAME
)
1620 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1622 && flow_bb_inside_loop_p (loop
, def_bb
))
1631 len
= TREE_OPERAND_LENGTH (expr
);
1632 for (i
= 0; i
< len
; i
++)
1633 if (TREE_OPERAND (expr
, i
)
1634 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1640 /* Given expression EXPR which computes inductive values with respect
1641 to loop recorded in DATA, this function returns biv from which EXPR
1642 is derived by tracing definition chains of ssa variables in EXPR. */
1645 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1650 enum tree_code code
;
1653 if (expr
== NULL_TREE
)
1656 if (is_gimple_min_invariant (expr
))
1659 code
= TREE_CODE (expr
);
1660 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1662 n
= TREE_OPERAND_LENGTH (expr
);
1663 for (i
= 0; i
< n
; i
++)
1665 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1671 /* Stop if it's not ssa name. */
1672 if (code
!= SSA_NAME
)
1675 iv
= get_iv (data
, expr
);
1676 if (!iv
|| integer_zerop (iv
->step
))
1681 stmt
= SSA_NAME_DEF_STMT (expr
);
1682 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1685 use_operand_p use_p
;
1687 if (virtual_operand_p (gimple_phi_result (phi
)))
1690 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1692 tree use
= USE_FROM_PTR (use_p
);
1693 iv
= find_deriving_biv_for_expr (data
, use
);
1699 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1702 e1
= gimple_assign_rhs1 (stmt
);
1703 code
= gimple_assign_rhs_code (stmt
);
1704 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1705 return find_deriving_biv_for_expr (data
, e1
);
1712 case POINTER_PLUS_EXPR
:
1713 /* Increments, decrements and multiplications by a constant
1715 e2
= gimple_assign_rhs2 (stmt
);
1716 iv
= find_deriving_biv_for_expr (data
, e2
);
1722 /* Casts are simple. */
1723 return find_deriving_biv_for_expr (data
, e1
);
1732 /* Record BIV, its predecessor and successor that they are used in
1733 address type uses. */
1736 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1739 tree type
, base_1
, base_2
;
1742 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1743 || biv
->have_address_use
|| !biv
->no_overflow
)
1746 type
= TREE_TYPE (biv
->base
);
1747 if (!INTEGRAL_TYPE_P (type
))
1750 biv
->have_address_use
= true;
1751 data
->bivs_not_used_in_addr
--;
1752 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1753 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1755 struct iv
*iv
= ver_info (data
, i
)->iv
;
1757 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1758 || iv
->have_address_use
|| !iv
->no_overflow
)
1761 if (type
!= TREE_TYPE (iv
->base
)
1762 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1765 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1768 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1769 if (operand_equal_p (base_1
, iv
->base
, 0)
1770 || operand_equal_p (base_2
, biv
->base
, 0))
1772 iv
->have_address_use
= true;
1773 data
->bivs_not_used_in_addr
--;
1778 /* Cumulates the steps of indices into DATA and replaces their values with the
1779 initial ones. Returns false when the value of the index cannot be determined.
1780 Callback for for_each_index. */
1782 struct ifs_ivopts_data
1784 struct ivopts_data
*ivopts_data
;
1790 idx_find_step (tree base
, tree
*idx
, void *data
)
1792 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1794 bool use_overflow_semantics
= false;
1795 tree step
, iv_base
, iv_step
, lbound
, off
;
1796 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1798 /* If base is a component ref, require that the offset of the reference
1800 if (TREE_CODE (base
) == COMPONENT_REF
)
1802 off
= component_ref_field_offset (base
);
1803 return expr_invariant_in_loop_p (loop
, off
);
1806 /* If base is array, first check whether we will be able to move the
1807 reference out of the loop (in order to take its address in strength
1808 reduction). In order for this to work we need both lower bound
1809 and step to be loop invariants. */
1810 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1812 /* Moreover, for a range, the size needs to be invariant as well. */
1813 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1814 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1817 step
= array_ref_element_size (base
);
1818 lbound
= array_ref_low_bound (base
);
1820 if (!expr_invariant_in_loop_p (loop
, step
)
1821 || !expr_invariant_in_loop_p (loop
, lbound
))
1825 if (TREE_CODE (*idx
) != SSA_NAME
)
1828 iv
= get_iv (dta
->ivopts_data
, *idx
);
1832 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1833 *&x[0], which is not folded and does not trigger the
1834 ARRAY_REF path below. */
1837 if (integer_zerop (iv
->step
))
1840 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1842 step
= array_ref_element_size (base
);
1844 /* We only handle addresses whose step is an integer constant. */
1845 if (TREE_CODE (step
) != INTEGER_CST
)
1849 /* The step for pointer arithmetics already is 1 byte. */
1850 step
= size_one_node
;
1854 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1855 use_overflow_semantics
= true;
1857 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1858 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1859 use_overflow_semantics
))
1861 /* The index might wrap. */
1865 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1866 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1868 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
1871 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
1873 record_biv_for_address_use (dta
->ivopts_data
, iv
);
1878 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1879 object is passed to it in DATA. */
1882 idx_record_use (tree base
, tree
*idx
,
1885 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1886 find_interesting_uses_op (data
, *idx
);
1887 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1889 find_interesting_uses_op (data
, array_ref_element_size (base
));
1890 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1895 /* If we can prove that TOP = cst * BOT for some constant cst,
1896 store cst to MUL and return true. Otherwise return false.
1897 The returned value is always sign-extended, regardless of the
1898 signedness of TOP and BOT. */
1901 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1904 enum tree_code code
;
1905 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1906 widest_int res
, p0
, p1
;
1911 if (operand_equal_p (top
, bot
, 0))
1917 code
= TREE_CODE (top
);
1921 mby
= TREE_OPERAND (top
, 1);
1922 if (TREE_CODE (mby
) != INTEGER_CST
)
1925 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1928 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1933 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1934 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1937 if (code
== MINUS_EXPR
)
1939 *mul
= wi::sext (p0
+ p1
, precision
);
1943 if (TREE_CODE (bot
) != INTEGER_CST
)
1946 p0
= widest_int::from (top
, SIGNED
);
1947 p1
= widest_int::from (bot
, SIGNED
);
1950 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1958 /* Return true if memory reference REF with step STEP may be unaligned. */
1961 may_be_unaligned_p (tree ref
, tree step
)
1963 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1964 thus they are not misaligned. */
1965 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1968 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1969 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1970 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1972 unsigned HOST_WIDE_INT bitpos
;
1973 unsigned int ref_align
;
1974 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1975 if (ref_align
< align
1976 || (bitpos
% align
) != 0
1977 || (bitpos
% BITS_PER_UNIT
) != 0)
1980 unsigned int trailing_zeros
= tree_ctz (step
);
1981 if (trailing_zeros
< HOST_BITS_PER_INT
1982 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1988 /* Return true if EXPR may be non-addressable. */
1991 may_be_nonaddressable_p (tree expr
)
1993 switch (TREE_CODE (expr
))
1995 case TARGET_MEM_REF
:
1996 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1997 target, thus they are always addressable. */
2001 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2002 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2004 case VIEW_CONVERT_EXPR
:
2005 /* This kind of view-conversions may wrap non-addressable objects
2006 and make them look addressable. After some processing the
2007 non-addressability may be uncovered again, causing ADDR_EXPRs
2008 of inappropriate objects to be built. */
2009 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2010 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2013 /* ... fall through ... */
2016 case ARRAY_RANGE_REF
:
2017 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2030 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
2032 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2033 If there is an existing use which has same stripped iv base and step,
2034 this function records this one as a sub use to that; otherwise records
2035 it as a normal one. */
2037 static struct iv_use
*
2038 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
2039 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
)
2044 unsigned HOST_WIDE_INT addr_offset
;
2046 /* Only support sub use for address type uses, that is, with base
2048 if (!iv
->base_object
)
2049 return record_use (data
, use_p
, iv
, stmt
, use_type
);
2051 addr_base
= strip_offset (iv
->base
, &addr_offset
);
2052 for (i
= 0; i
< n_iv_uses (data
); i
++)
2054 use
= iv_use (data
, i
);
2055 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
2058 /* Check if it has the same stripped base and step. */
2059 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
2060 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
2061 && operand_equal_p (addr_base
, use
->addr_base
, 0))
2065 if (i
== n_iv_uses (data
))
2066 return record_use (data
, use_p
, iv
, stmt
,
2067 use_type
, addr_base
, addr_offset
);
2069 return record_sub_use (data
, use_p
, iv
, stmt
,
2070 use_type
, addr_base
, addr_offset
, i
);
2073 /* Finds addresses in *OP_P inside STMT. */
2076 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2079 tree base
= *op_p
, step
= size_zero_node
;
2081 struct ifs_ivopts_data ifs_ivopts_data
;
2083 /* Do not play with volatile memory references. A bit too conservative,
2084 perhaps, but safe. */
2085 if (gimple_has_volatile_ops (stmt
))
2088 /* Ignore bitfields for now. Not really something terribly complicated
2090 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2093 base
= unshare_expr (base
);
2095 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2097 tree type
= build_pointer_type (TREE_TYPE (base
));
2101 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2103 civ
= get_iv (data
, TMR_BASE (base
));
2107 TMR_BASE (base
) = civ
->base
;
2110 if (TMR_INDEX2 (base
)
2111 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2113 civ
= get_iv (data
, TMR_INDEX2 (base
));
2117 TMR_INDEX2 (base
) = civ
->base
;
2120 if (TMR_INDEX (base
)
2121 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2123 civ
= get_iv (data
, TMR_INDEX (base
));
2127 TMR_INDEX (base
) = civ
->base
;
2132 if (TMR_STEP (base
))
2133 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2135 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2139 if (integer_zerop (step
))
2141 base
= tree_mem_ref_addr (type
, base
);
2145 ifs_ivopts_data
.ivopts_data
= data
;
2146 ifs_ivopts_data
.stmt
= stmt
;
2147 ifs_ivopts_data
.step
= size_zero_node
;
2148 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2149 || integer_zerop (ifs_ivopts_data
.step
))
2151 step
= ifs_ivopts_data
.step
;
2153 /* Check that the base expression is addressable. This needs
2154 to be done after substituting bases of IVs into it. */
2155 if (may_be_nonaddressable_p (base
))
2158 /* Moreover, on strict alignment platforms, check that it is
2159 sufficiently aligned. */
2160 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2163 base
= build_fold_addr_expr (base
);
2165 /* Substituting bases of IVs into the base expression might
2166 have caused folding opportunities. */
2167 if (TREE_CODE (base
) == ADDR_EXPR
)
2169 tree
*ref
= &TREE_OPERAND (base
, 0);
2170 while (handled_component_p (*ref
))
2171 ref
= &TREE_OPERAND (*ref
, 0);
2172 if (TREE_CODE (*ref
) == MEM_REF
)
2174 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2175 TREE_OPERAND (*ref
, 0),
2176 TREE_OPERAND (*ref
, 1));
2183 civ
= alloc_iv (data
, base
, step
);
2184 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2188 for_each_index (op_p
, idx_record_use
, data
);
2191 /* Finds and records invariants used in STMT. */
2194 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2197 use_operand_p use_p
;
2200 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2202 op
= USE_FROM_PTR (use_p
);
2203 record_invariant (data
, op
, false);
2207 /* Finds interesting uses of induction variables in the statement STMT. */
2210 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2213 tree op
, *lhs
, *rhs
;
2215 use_operand_p use_p
;
2216 enum tree_code code
;
2218 find_invariants_stmt (data
, stmt
);
2220 if (gimple_code (stmt
) == GIMPLE_COND
)
2222 find_interesting_uses_cond (data
, stmt
);
2226 if (is_gimple_assign (stmt
))
2228 lhs
= gimple_assign_lhs_ptr (stmt
);
2229 rhs
= gimple_assign_rhs1_ptr (stmt
);
2231 if (TREE_CODE (*lhs
) == SSA_NAME
)
2233 /* If the statement defines an induction variable, the uses are not
2234 interesting by themselves. */
2236 iv
= get_iv (data
, *lhs
);
2238 if (iv
&& !integer_zerop (iv
->step
))
2242 code
= gimple_assign_rhs_code (stmt
);
2243 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2244 && (REFERENCE_CLASS_P (*rhs
)
2245 || is_gimple_val (*rhs
)))
2247 if (REFERENCE_CLASS_P (*rhs
))
2248 find_interesting_uses_address (data
, stmt
, rhs
);
2250 find_interesting_uses_op (data
, *rhs
);
2252 if (REFERENCE_CLASS_P (*lhs
))
2253 find_interesting_uses_address (data
, stmt
, lhs
);
2256 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2258 find_interesting_uses_cond (data
, stmt
);
2262 /* TODO -- we should also handle address uses of type
2264 memory = call (whatever);
2271 if (gimple_code (stmt
) == GIMPLE_PHI
2272 && gimple_bb (stmt
) == data
->current_loop
->header
)
2274 iv
= get_iv (data
, PHI_RESULT (stmt
));
2276 if (iv
&& !integer_zerop (iv
->step
))
2280 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2282 op
= USE_FROM_PTR (use_p
);
2284 if (TREE_CODE (op
) != SSA_NAME
)
2287 iv
= get_iv (data
, op
);
2291 find_interesting_uses_op (data
, op
);
2295 /* Finds interesting uses of induction variables outside of loops
2296 on loop exit edge EXIT. */
2299 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2305 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2308 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2309 if (!virtual_operand_p (def
))
2310 find_interesting_uses_op (data
, def
);
2314 /* Finds uses of the induction variables that are interesting. */
2317 find_interesting_uses (struct ivopts_data
*data
)
2320 gimple_stmt_iterator bsi
;
2321 basic_block
*body
= get_loop_body (data
->current_loop
);
2323 struct version_info
*info
;
2326 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2327 fprintf (dump_file
, "Uses:\n\n");
2329 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2334 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2335 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2336 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2337 find_interesting_uses_outside (data
, e
);
2339 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2340 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2341 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2342 if (!is_gimple_debug (gsi_stmt (bsi
)))
2343 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2350 fprintf (dump_file
, "\n");
2352 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2354 info
= ver_info (data
, i
);
2357 fprintf (dump_file
, " ");
2358 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2359 fprintf (dump_file
, " is invariant (%d)%s\n",
2360 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2364 fprintf (dump_file
, "\n");
2370 /* Compute maximum offset of [base + offset] addressing mode
2371 for memory reference represented by USE. */
2373 static HOST_WIDE_INT
2374 compute_max_addr_offset (struct iv_use
*use
)
2378 HOST_WIDE_INT i
, off
;
2379 unsigned list_index
, num
;
2381 machine_mode mem_mode
, addr_mode
;
2382 static vec
<HOST_WIDE_INT
> max_offset_list
;
2384 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2385 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2387 num
= max_offset_list
.length ();
2388 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2389 if (list_index
>= num
)
2391 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2392 for (; num
< max_offset_list
.length (); num
++)
2393 max_offset_list
[num
] = -1;
2396 off
= max_offset_list
[list_index
];
2400 addr_mode
= targetm
.addr_space
.address_mode (as
);
2401 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2402 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2404 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2405 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2406 width
= HOST_BITS_PER_WIDE_INT
- 1;
2408 for (i
= width
; i
> 0; i
--)
2410 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2411 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2412 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2415 /* For some strict-alignment targets, the offset must be naturally
2416 aligned. Try an aligned offset if mem_mode is not QImode. */
2417 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2418 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2420 off
-= GET_MODE_SIZE (mem_mode
);
2421 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2422 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2429 max_offset_list
[list_index
] = off
;
2433 /* Check if all small groups should be split. Return true if and
2436 1) At least one groups contain two uses with different offsets.
2437 2) No group contains more than two uses with different offsets.
2439 Return false otherwise. We want to split such groups because:
2441 1) Small groups don't have much benefit and may interfer with
2442 general candidate selection.
2443 2) Size for problem with only small groups is usually small and
2444 general algorithm can handle it well.
2446 TODO -- Above claim may not hold when auto increment is supported. */
2449 split_all_small_groups (struct ivopts_data
*data
)
2451 bool split_p
= false;
2452 unsigned int i
, n
, distinct
;
2453 struct iv_use
*pre
, *use
;
2455 n
= n_iv_uses (data
);
2456 for (i
= 0; i
< n
; i
++)
2458 use
= iv_use (data
, i
);
2463 gcc_assert (use
->type
== USE_ADDRESS
);
2464 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2466 if (pre
->addr_offset
!= use
->addr_offset
)
2479 /* For each group of address type uses, this function further groups
2480 these uses according to the maximum offset supported by target's
2481 [base + offset] addressing mode. */
2484 group_address_uses (struct ivopts_data
*data
)
2486 HOST_WIDE_INT max_offset
= -1;
2487 unsigned int i
, n
, sub_id
;
2488 struct iv_use
*pre
, *use
;
2489 unsigned HOST_WIDE_INT addr_offset_first
;
2491 /* Reset max offset to split all small groups. */
2492 if (split_all_small_groups (data
))
2495 n
= n_iv_uses (data
);
2496 for (i
= 0; i
< n
; i
++)
2498 use
= iv_use (data
, i
);
2502 gcc_assert (use
->type
== USE_ADDRESS
);
2503 if (max_offset
!= 0)
2504 max_offset
= compute_max_addr_offset (use
);
2509 addr_offset_first
= use
->addr_offset
;
2510 /* Only uses with offset that can fit in offset part against
2511 the first use can be grouped together. */
2512 for (pre
= use
, use
= use
->next
;
2513 use
&& (use
->addr_offset
- addr_offset_first
2514 <= (unsigned HOST_WIDE_INT
) max_offset
);
2515 pre
= use
, use
= use
->next
)
2518 use
->sub_id
= ++sub_id
;
2521 /* Break the list and create new group. */
2525 use
->id
= n_iv_uses (data
);
2526 use
->related_cands
= BITMAP_ALLOC (NULL
);
2527 data
->iv_uses
.safe_push (use
);
2532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2533 dump_uses (dump_file
, data
);
2536 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2537 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2538 we are at the top-level of the processed address. */
2541 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2542 HOST_WIDE_INT
*offset
)
2544 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2545 enum tree_code code
;
2546 tree type
, orig_type
= TREE_TYPE (expr
);
2547 HOST_WIDE_INT off0
, off1
, st
;
2548 tree orig_expr
= expr
;
2552 type
= TREE_TYPE (expr
);
2553 code
= TREE_CODE (expr
);
2559 if (!cst_and_fits_in_hwi (expr
)
2560 || integer_zerop (expr
))
2563 *offset
= int_cst_value (expr
);
2564 return build_int_cst (orig_type
, 0);
2566 case POINTER_PLUS_EXPR
:
2569 op0
= TREE_OPERAND (expr
, 0);
2570 op1
= TREE_OPERAND (expr
, 1);
2572 op0
= strip_offset_1 (op0
, false, false, &off0
);
2573 op1
= strip_offset_1 (op1
, false, false, &off1
);
2575 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2576 if (op0
== TREE_OPERAND (expr
, 0)
2577 && op1
== TREE_OPERAND (expr
, 1))
2580 if (integer_zerop (op1
))
2582 else if (integer_zerop (op0
))
2584 if (code
== MINUS_EXPR
)
2585 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2590 expr
= fold_build2 (code
, type
, op0
, op1
);
2592 return fold_convert (orig_type
, expr
);
2595 op1
= TREE_OPERAND (expr
, 1);
2596 if (!cst_and_fits_in_hwi (op1
))
2599 op0
= TREE_OPERAND (expr
, 0);
2600 op0
= strip_offset_1 (op0
, false, false, &off0
);
2601 if (op0
== TREE_OPERAND (expr
, 0))
2604 *offset
= off0
* int_cst_value (op1
);
2605 if (integer_zerop (op0
))
2608 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2610 return fold_convert (orig_type
, expr
);
2613 case ARRAY_RANGE_REF
:
2617 step
= array_ref_element_size (expr
);
2618 if (!cst_and_fits_in_hwi (step
))
2621 st
= int_cst_value (step
);
2622 op1
= TREE_OPERAND (expr
, 1);
2623 op1
= strip_offset_1 (op1
, false, false, &off1
);
2624 *offset
= off1
* st
;
2627 && integer_zerop (op1
))
2629 /* Strip the component reference completely. */
2630 op0
= TREE_OPERAND (expr
, 0);
2631 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2644 tmp
= component_ref_field_offset (expr
);
2645 field
= TREE_OPERAND (expr
, 1);
2647 && cst_and_fits_in_hwi (tmp
)
2648 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2650 HOST_WIDE_INT boffset
, abs_off
;
2652 /* Strip the component reference completely. */
2653 op0
= TREE_OPERAND (expr
, 0);
2654 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2655 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2656 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2660 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2667 op0
= TREE_OPERAND (expr
, 0);
2668 op0
= strip_offset_1 (op0
, true, true, &off0
);
2671 if (op0
== TREE_OPERAND (expr
, 0))
2674 expr
= build_fold_addr_expr (op0
);
2675 return fold_convert (orig_type
, expr
);
2678 /* ??? Offset operand? */
2679 inside_addr
= false;
2686 /* Default handling of expressions for that we want to recurse into
2687 the first operand. */
2688 op0
= TREE_OPERAND (expr
, 0);
2689 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2692 if (op0
== TREE_OPERAND (expr
, 0)
2693 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2696 expr
= copy_node (expr
);
2697 TREE_OPERAND (expr
, 0) = op0
;
2699 TREE_OPERAND (expr
, 1) = op1
;
2701 /* Inside address, we might strip the top level component references,
2702 thus changing type of the expression. Handling of ADDR_EXPR
2704 expr
= fold_convert (orig_type
, expr
);
2709 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2712 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2715 tree core
= strip_offset_1 (expr
, false, false, &off
);
2720 /* Returns variant of TYPE that can be used as base for different uses.
2721 We return unsigned type with the same precision, which avoids problems
2725 generic_type_for (tree type
)
2727 if (POINTER_TYPE_P (type
))
2728 return unsigned_type_for (type
);
2730 if (TYPE_UNSIGNED (type
))
2733 return unsigned_type_for (type
);
2736 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2737 the bitmap to that we should store it. */
2739 static struct ivopts_data
*fd_ivopts_data
;
2741 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2743 bitmap
*depends_on
= (bitmap
*) data
;
2744 struct version_info
*info
;
2746 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2748 info
= name_info (fd_ivopts_data
, *expr_p
);
2750 if (!info
->inv_id
|| info
->has_nonlin_use
)
2754 *depends_on
= BITMAP_ALLOC (NULL
);
2755 bitmap_set_bit (*depends_on
, info
->inv_id
);
2760 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2761 position to POS. If USE is not NULL, the candidate is set as related to
2762 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2763 replacement of the final value of the iv by a direct computation. */
2765 static struct iv_cand
*
2766 add_candidate_1 (struct ivopts_data
*data
,
2767 tree base
, tree step
, bool important
, enum iv_position pos
,
2768 struct iv_use
*use
, gimple
*incremented_at
,
2769 struct iv
*orig_iv
= NULL
)
2772 struct iv_cand
*cand
= NULL
;
2773 tree type
, orig_type
;
2775 /* For non-original variables, make sure their values are computed in a type
2776 that does not invoke undefined behavior on overflows (since in general,
2777 we cannot prove that these induction variables are non-wrapping). */
2778 if (pos
!= IP_ORIGINAL
)
2780 orig_type
= TREE_TYPE (base
);
2781 type
= generic_type_for (orig_type
);
2782 if (type
!= orig_type
)
2784 base
= fold_convert (type
, base
);
2785 step
= fold_convert (type
, step
);
2789 for (i
= 0; i
< n_iv_cands (data
); i
++)
2791 cand
= iv_cand (data
, i
);
2793 if (cand
->pos
!= pos
)
2796 if (cand
->incremented_at
!= incremented_at
2797 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2798 && cand
->ainc_use
!= use
))
2812 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2813 && operand_equal_p (step
, cand
->iv
->step
, 0)
2814 && (TYPE_PRECISION (TREE_TYPE (base
))
2815 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2819 if (i
== n_iv_cands (data
))
2821 cand
= XCNEW (struct iv_cand
);
2827 cand
->iv
= alloc_iv (data
, base
, step
);
2830 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2832 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2833 cand
->var_after
= cand
->var_before
;
2835 cand
->important
= important
;
2836 cand
->incremented_at
= incremented_at
;
2837 data
->iv_candidates
.safe_push (cand
);
2840 && TREE_CODE (step
) != INTEGER_CST
)
2842 fd_ivopts_data
= data
;
2843 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2846 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2847 cand
->ainc_use
= use
;
2849 cand
->ainc_use
= NULL
;
2851 cand
->orig_iv
= orig_iv
;
2852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2853 dump_cand (dump_file
, cand
);
2856 if (important
&& !cand
->important
)
2858 cand
->important
= true;
2859 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2860 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2865 bitmap_set_bit (use
->related_cands
, i
);
2866 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2867 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2874 /* Returns true if incrementing the induction variable at the end of the LOOP
2877 The purpose is to avoid splitting latch edge with a biv increment, thus
2878 creating a jump, possibly confusing other optimization passes and leaving
2879 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2880 is not available (so we do not have a better alternative), or if the latch
2881 edge is already nonempty. */
2884 allow_ip_end_pos_p (struct loop
*loop
)
2886 if (!ip_normal_pos (loop
))
2889 if (!empty_block_p (ip_end_pos (loop
)))
2895 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2896 Important field is set to IMPORTANT. */
2899 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2900 bool important
, struct iv_use
*use
)
2902 basic_block use_bb
= gimple_bb (use
->stmt
);
2903 machine_mode mem_mode
;
2904 unsigned HOST_WIDE_INT cstepi
;
2906 /* If we insert the increment in any position other than the standard
2907 ones, we must ensure that it is incremented once per iteration.
2908 It must not be in an inner nested loop, or one side of an if
2910 if (use_bb
->loop_father
!= data
->current_loop
2911 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2912 || stmt_could_throw_p (use
->stmt
)
2913 || !cst_and_fits_in_hwi (step
))
2916 cstepi
= int_cst_value (step
);
2918 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2919 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2920 || USE_STORE_PRE_INCREMENT (mem_mode
))
2921 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2922 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2923 || USE_STORE_PRE_DECREMENT (mem_mode
))
2924 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2926 enum tree_code code
= MINUS_EXPR
;
2928 tree new_step
= step
;
2930 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2932 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2933 code
= POINTER_PLUS_EXPR
;
2936 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2937 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2938 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2941 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2942 || USE_STORE_POST_INCREMENT (mem_mode
))
2943 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2944 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2945 || USE_STORE_POST_DECREMENT (mem_mode
))
2946 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2948 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2953 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2954 position to POS. If USE is not NULL, the candidate is set as related to
2955 it. The candidate computation is scheduled before exit condition and at
2959 add_candidate (struct ivopts_data
*data
,
2960 tree base
, tree step
, bool important
, struct iv_use
*use
,
2961 struct iv
*orig_iv
= NULL
)
2963 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2965 if (ip_normal_pos (data
->current_loop
))
2966 add_candidate_1 (data
, base
, step
, important
,
2967 IP_NORMAL
, use
, NULL
, orig_iv
);
2968 if (ip_end_pos (data
->current_loop
)
2969 && allow_ip_end_pos_p (data
->current_loop
))
2970 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
2973 /* Adds standard iv candidates. */
2976 add_standard_iv_candidates (struct ivopts_data
*data
)
2978 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2980 /* The same for a double-integer type if it is still fast enough. */
2982 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2983 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2984 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2985 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2987 /* The same for a double-integer type if it is still fast enough. */
2989 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2990 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2991 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2992 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2996 /* Adds candidates bases on the old induction variable IV. */
2999 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3003 struct iv_cand
*cand
;
3005 /* Check if this biv is used in address type use. */
3006 if (iv
->no_overflow
&& iv
->have_address_use
3007 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3008 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3010 tree base
= fold_convert (sizetype
, iv
->base
);
3011 tree step
= fold_convert (sizetype
, iv
->step
);
3013 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3014 add_candidate (data
, base
, step
, true, NULL
, iv
);
3015 /* Add iv cand of the original type only if it has nonlinear use. */
3016 if (iv
->have_use_for
)
3017 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3020 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3022 /* The same, but with initial value zero. */
3023 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3024 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3026 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3027 iv
->step
, true, NULL
);
3029 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3030 if (gimple_code (phi
) == GIMPLE_PHI
)
3032 /* Additionally record the possibility of leaving the original iv
3034 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3035 /* Don't add candidate if it's from another PHI node because
3036 it's an affine iv appearing in the form of PEELED_CHREC. */
3037 phi
= SSA_NAME_DEF_STMT (def
);
3038 if (gimple_code (phi
) != GIMPLE_PHI
)
3040 cand
= add_candidate_1 (data
,
3041 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3042 SSA_NAME_DEF_STMT (def
));
3043 cand
->var_before
= iv
->ssa_name
;
3044 cand
->var_after
= def
;
3047 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3051 /* Adds candidates based on the old induction variables. */
3054 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3060 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3062 iv
= ver_info (data
, i
)->iv
;
3063 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3064 add_iv_candidate_for_biv (data
, iv
);
3068 /* Adds candidates based on the value of USE's iv. */
3071 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3073 unsigned HOST_WIDE_INT offset
;
3076 struct iv
*iv
= use
->iv
;
3078 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3080 /* The same, but with initial value zero. Make such variable important,
3081 since it is generic enough so that possibly many uses may be based
3083 basetype
= TREE_TYPE (iv
->base
);
3084 if (POINTER_TYPE_P (basetype
))
3085 basetype
= sizetype
;
3086 add_candidate (data
, build_int_cst (basetype
, 0), iv
->step
, true, use
);
3088 /* Third, try removing the constant offset. Make sure to even
3089 add a candidate for &a[0] vs. (T *)&a. */
3090 base
= strip_offset (iv
->base
, &offset
);
3091 if (offset
|| base
!= iv
->base
)
3092 add_candidate (data
, base
, iv
->step
, false, use
);
3094 /* At last, add auto-incremental candidates. Make such variables
3095 important since other iv uses with same base object may be based
3097 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3098 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3101 /* Adds candidates based on the uses. */
3104 add_iv_candidate_for_uses (struct ivopts_data
*data
)
3108 for (i
= 0; i
< n_iv_uses (data
); i
++)
3110 struct iv_use
*use
= iv_use (data
, i
);
3117 case USE_NONLINEAR_EXPR
:
3120 /* Just add the ivs based on the value of the iv used here. */
3121 add_iv_candidate_for_use (data
, use
);
3130 /* Record important candidates and add them to related_cands bitmaps
3134 record_important_candidates (struct ivopts_data
*data
)
3139 for (i
= 0; i
< n_iv_cands (data
); i
++)
3141 struct iv_cand
*cand
= iv_cand (data
, i
);
3143 if (cand
->important
)
3144 bitmap_set_bit (data
->important_candidates
, i
);
3147 data
->consider_all_candidates
= (n_iv_cands (data
)
3148 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3150 if (data
->consider_all_candidates
)
3152 /* We will not need "related_cands" bitmaps in this case,
3153 so release them to decrease peak memory consumption. */
3154 for (i
= 0; i
< n_iv_uses (data
); i
++)
3156 use
= iv_use (data
, i
);
3157 BITMAP_FREE (use
->related_cands
);
3162 /* Add important candidates to the related_cands bitmaps. */
3163 for (i
= 0; i
< n_iv_uses (data
); i
++)
3164 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
3165 data
->important_candidates
);
3169 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3170 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3171 we allocate a simple list to every use. */
3174 alloc_use_cost_map (struct ivopts_data
*data
)
3176 unsigned i
, size
, s
;
3178 for (i
= 0; i
< n_iv_uses (data
); i
++)
3180 struct iv_use
*use
= iv_use (data
, i
);
3182 if (data
->consider_all_candidates
)
3183 size
= n_iv_cands (data
);
3186 s
= bitmap_count_bits (use
->related_cands
);
3188 /* Round up to the power of two, so that moduling by it is fast. */
3189 size
= s
? (1 << ceil_log2 (s
)) : 1;
3192 use
->n_map_members
= size
;
3193 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3197 /* Returns description of computation cost of expression whose runtime
3198 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3201 new_cost (unsigned runtime
, unsigned complexity
)
3205 cost
.cost
= runtime
;
3206 cost
.complexity
= complexity
;
3211 /* Returns true if COST is infinite. */
3214 infinite_cost_p (comp_cost cost
)
3216 return cost
.cost
== INFTY
;
3219 /* Adds costs COST1 and COST2. */
3222 add_costs (comp_cost cost1
, comp_cost cost2
)
3224 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3225 return infinite_cost
;
3227 cost1
.cost
+= cost2
.cost
;
3228 cost1
.complexity
+= cost2
.complexity
;
3232 /* Subtracts costs COST1 and COST2. */
3235 sub_costs (comp_cost cost1
, comp_cost cost2
)
3237 cost1
.cost
-= cost2
.cost
;
3238 cost1
.complexity
-= cost2
.complexity
;
3243 /* Returns a negative number if COST1 < COST2, a positive number if
3244 COST1 > COST2, and 0 if COST1 = COST2. */
3247 compare_costs (comp_cost cost1
, comp_cost cost2
)
3249 if (cost1
.cost
== cost2
.cost
)
3250 return cost1
.complexity
- cost2
.complexity
;
3252 return cost1
.cost
- cost2
.cost
;
3255 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3256 on invariants DEPENDS_ON and that the value used in expressing it
3257 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3260 set_use_iv_cost (struct ivopts_data
*data
,
3261 struct iv_use
*use
, struct iv_cand
*cand
,
3262 comp_cost cost
, bitmap depends_on
, tree value
,
3263 enum tree_code comp
, int inv_expr_id
)
3267 if (infinite_cost_p (cost
))
3269 BITMAP_FREE (depends_on
);
3273 if (data
->consider_all_candidates
)
3275 use
->cost_map
[cand
->id
].cand
= cand
;
3276 use
->cost_map
[cand
->id
].cost
= cost
;
3277 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3278 use
->cost_map
[cand
->id
].value
= value
;
3279 use
->cost_map
[cand
->id
].comp
= comp
;
3280 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3284 /* n_map_members is a power of two, so this computes modulo. */
3285 s
= cand
->id
& (use
->n_map_members
- 1);
3286 for (i
= s
; i
< use
->n_map_members
; i
++)
3287 if (!use
->cost_map
[i
].cand
)
3289 for (i
= 0; i
< s
; i
++)
3290 if (!use
->cost_map
[i
].cand
)
3296 use
->cost_map
[i
].cand
= cand
;
3297 use
->cost_map
[i
].cost
= cost
;
3298 use
->cost_map
[i
].depends_on
= depends_on
;
3299 use
->cost_map
[i
].value
= value
;
3300 use
->cost_map
[i
].comp
= comp
;
3301 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3304 /* Gets cost of (USE, CANDIDATE) pair. */
3306 static struct cost_pair
*
3307 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3308 struct iv_cand
*cand
)
3311 struct cost_pair
*ret
;
3316 if (data
->consider_all_candidates
)
3318 ret
= use
->cost_map
+ cand
->id
;
3325 /* n_map_members is a power of two, so this computes modulo. */
3326 s
= cand
->id
& (use
->n_map_members
- 1);
3327 for (i
= s
; i
< use
->n_map_members
; i
++)
3328 if (use
->cost_map
[i
].cand
== cand
)
3329 return use
->cost_map
+ i
;
3330 else if (use
->cost_map
[i
].cand
== NULL
)
3332 for (i
= 0; i
< s
; i
++)
3333 if (use
->cost_map
[i
].cand
== cand
)
3334 return use
->cost_map
+ i
;
3335 else if (use
->cost_map
[i
].cand
== NULL
)
3341 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3343 produce_memory_decl_rtl (tree obj
, int *regno
)
3345 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3346 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3350 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3352 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3353 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3354 SET_SYMBOL_REF_DECL (x
, obj
);
3355 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3356 set_mem_addr_space (x
, as
);
3357 targetm
.encode_section_info (obj
, x
, true);
3361 x
= gen_raw_REG (address_mode
, (*regno
)++);
3362 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3363 set_mem_addr_space (x
, as
);
3369 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3370 walk_tree. DATA contains the actual fake register number. */
3373 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3375 tree obj
= NULL_TREE
;
3377 int *regno
= (int *) data
;
3379 switch (TREE_CODE (*expr_p
))
3382 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3383 handled_component_p (*expr_p
);
3384 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3387 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3388 x
= produce_memory_decl_rtl (obj
, regno
);
3393 obj
= SSA_NAME_VAR (*expr_p
);
3394 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3397 if (!DECL_RTL_SET_P (obj
))
3398 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3407 if (DECL_RTL_SET_P (obj
))
3410 if (DECL_MODE (obj
) == BLKmode
)
3411 x
= produce_memory_decl_rtl (obj
, regno
);
3413 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3423 decl_rtl_to_reset
.safe_push (obj
);
3424 SET_DECL_RTL (obj
, x
);
3430 /* Determines cost of the computation of EXPR. */
3433 computation_cost (tree expr
, bool speed
)
3437 tree type
= TREE_TYPE (expr
);
3439 /* Avoid using hard regs in ways which may be unsupported. */
3440 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3441 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3442 enum node_frequency real_frequency
= node
->frequency
;
3444 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3445 crtl
->maybe_hot_insn_p
= speed
;
3446 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3448 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3451 default_rtl_profile ();
3452 node
->frequency
= real_frequency
;
3454 cost
= seq_cost (seq
, speed
);
3456 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3457 TYPE_ADDR_SPACE (type
), speed
);
3458 else if (!REG_P (rslt
))
3459 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3464 /* Returns variable containing the value of candidate CAND at statement AT. */
3467 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3469 if (stmt_after_increment (loop
, cand
, stmt
))
3470 return cand
->var_after
;
3472 return cand
->var_before
;
3475 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3476 same precision that is at least as wide as the precision of TYPE, stores
3477 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3481 determine_common_wider_type (tree
*a
, tree
*b
)
3483 tree wider_type
= NULL
;
3485 tree atype
= TREE_TYPE (*a
);
3487 if (CONVERT_EXPR_P (*a
))
3489 suba
= TREE_OPERAND (*a
, 0);
3490 wider_type
= TREE_TYPE (suba
);
3491 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3497 if (CONVERT_EXPR_P (*b
))
3499 subb
= TREE_OPERAND (*b
, 0);
3500 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3511 /* Determines the expression by that USE is expressed from induction variable
3512 CAND at statement AT in LOOP. The expression is stored in a decomposed
3513 form into AFF. Returns false if USE cannot be expressed using CAND. */
3516 get_computation_aff (struct loop
*loop
,
3517 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3518 struct aff_tree
*aff
)
3520 tree ubase
= use
->iv
->base
;
3521 tree ustep
= use
->iv
->step
;
3522 tree cbase
= cand
->iv
->base
;
3523 tree cstep
= cand
->iv
->step
, cstep_common
;
3524 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3525 tree common_type
, var
;
3527 aff_tree cbase_aff
, var_aff
;
3530 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3532 /* We do not have a precision to express the values of use. */
3536 var
= var_at_stmt (loop
, cand
, at
);
3537 uutype
= unsigned_type_for (utype
);
3539 /* If the conversion is not noop, perform it. */
3540 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3542 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3543 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3545 tree inner_base
, inner_step
, inner_type
;
3546 inner_base
= TREE_OPERAND (cbase
, 0);
3547 if (CONVERT_EXPR_P (cstep
))
3548 inner_step
= TREE_OPERAND (cstep
, 0);
3552 inner_type
= TREE_TYPE (inner_base
);
3553 /* If candidate is added from a biv whose type is smaller than
3554 ctype, we know both candidate and the biv won't overflow.
3555 In this case, it's safe to skip the convertion in candidate.
3556 As an example, (unsigned short)((unsigned long)A) equals to
3557 (unsigned short)A, if A has a type no larger than short. */
3558 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3564 cstep
= fold_convert (uutype
, cstep
);
3565 cbase
= fold_convert (uutype
, cbase
);
3566 var
= fold_convert (uutype
, var
);
3569 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3572 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3573 type, we achieve better folding by computing their difference in this
3574 wider type, and cast the result to UUTYPE. We do not need to worry about
3575 overflows, as all the arithmetics will in the end be performed in UUTYPE
3577 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3579 /* use = ubase - ratio * cbase + ratio * var. */
3580 tree_to_aff_combination (ubase
, common_type
, aff
);
3581 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3582 tree_to_aff_combination (var
, uutype
, &var_aff
);
3584 /* We need to shift the value if we are after the increment. */
3585 if (stmt_after_increment (loop
, cand
, at
))
3589 if (common_type
!= uutype
)
3590 cstep_common
= fold_convert (common_type
, cstep
);
3592 cstep_common
= cstep
;
3594 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3595 aff_combination_add (&cbase_aff
, &cstep_aff
);
3598 aff_combination_scale (&cbase_aff
, -rat
);
3599 aff_combination_add (aff
, &cbase_aff
);
3600 if (common_type
!= uutype
)
3601 aff_combination_convert (aff
, uutype
);
3603 aff_combination_scale (&var_aff
, rat
);
3604 aff_combination_add (aff
, &var_aff
);
3609 /* Return the type of USE. */
3612 get_use_type (struct iv_use
*use
)
3614 tree base_type
= TREE_TYPE (use
->iv
->base
);
3617 if (use
->type
== USE_ADDRESS
)
3619 /* The base_type may be a void pointer. Create a pointer type based on
3620 the mem_ref instead. */
3621 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3622 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3623 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3631 /* Determines the expression by that USE is expressed from induction variable
3632 CAND at statement AT in LOOP. The computation is unshared. */
3635 get_computation_at (struct loop
*loop
,
3636 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3639 tree type
= get_use_type (use
);
3641 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3643 unshare_aff_combination (&aff
);
3644 return fold_convert (type
, aff_combination_to_tree (&aff
));
3647 /* Determines the expression by that USE is expressed from induction variable
3648 CAND in LOOP. The computation is unshared. */
3651 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3653 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3656 /* Adjust the cost COST for being in loop setup rather than loop body.
3657 If we're optimizing for space, the loop setup overhead is constant;
3658 if we're optimizing for speed, amortize it over the per-iteration cost. */
3660 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3664 else if (optimize_loop_for_speed_p (data
->current_loop
))
3665 return cost
/ avg_loop_niter (data
->current_loop
);
3670 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3671 validity for a memory reference accessing memory of mode MODE in
3672 address space AS. */
3676 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3679 #define MAX_RATIO 128
3680 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3681 static vec
<sbitmap
> valid_mult_list
;
3684 if (data_index
>= valid_mult_list
.length ())
3685 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3687 valid_mult
= valid_mult_list
[data_index
];
3690 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3691 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3692 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3696 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3697 bitmap_clear (valid_mult
);
3698 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3699 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3700 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3702 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3703 if (memory_address_addr_space_p (mode
, addr
, as
)
3704 || memory_address_addr_space_p (mode
, scaled
, as
))
3705 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3710 fprintf (dump_file
, " allowed multipliers:");
3711 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3712 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3713 fprintf (dump_file
, " %d", (int) i
);
3714 fprintf (dump_file
, "\n");
3715 fprintf (dump_file
, "\n");
3718 valid_mult_list
[data_index
] = valid_mult
;
3721 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3724 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3727 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3728 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3729 variable is omitted. Compute the cost for a memory reference that accesses
3730 a memory location of mode MEM_MODE in address space AS.
3732 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3733 size of MEM_MODE / RATIO) is available. To make this determination, we
3734 look at the size of the increment to be made, which is given in CSTEP.
3735 CSTEP may be zero if the step is unknown.
3736 STMT_AFTER_INC is true iff the statement we're looking at is after the
3737 increment of the original biv.
3739 TODO -- there must be some better way. This all is quite crude. */
3743 AINC_PRE_INC
, /* Pre increment. */
3744 AINC_PRE_DEC
, /* Pre decrement. */
3745 AINC_POST_INC
, /* Post increment. */
3746 AINC_POST_DEC
, /* Post decrement. */
3747 AINC_NONE
/* Also the number of auto increment types. */
3750 typedef struct address_cost_data_s
3752 HOST_WIDE_INT min_offset
, max_offset
;
3753 unsigned costs
[2][2][2][2];
3754 unsigned ainc_costs
[AINC_NONE
];
3755 } *address_cost_data
;
3759 get_address_cost (bool symbol_present
, bool var_present
,
3760 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3761 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3762 addr_space_t as
, bool speed
,
3763 bool stmt_after_inc
, bool *may_autoinc
)
3765 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3766 static vec
<address_cost_data
> address_cost_data_list
;
3767 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3768 address_cost_data data
;
3769 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3770 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3771 unsigned cost
, acost
, complexity
;
3772 enum ainc_type autoinc_type
;
3773 bool offset_p
, ratio_p
, autoinc
;
3774 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3775 unsigned HOST_WIDE_INT mask
;
3778 if (data_index
>= address_cost_data_list
.length ())
3779 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3781 data
= address_cost_data_list
[data_index
];
3785 HOST_WIDE_INT rat
, off
= 0;
3786 int old_cse_not_expected
, width
;
3787 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3792 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3794 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3796 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3797 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3798 width
= HOST_BITS_PER_WIDE_INT
- 1;
3799 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3801 for (i
= width
; i
>= 0; i
--)
3803 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3804 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3805 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3808 data
->min_offset
= (i
== -1? 0 : off
);
3810 for (i
= width
; i
>= 0; i
--)
3812 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3813 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3814 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3816 /* For some strict-alignment targets, the offset must be naturally
3817 aligned. Try an aligned offset if mem_mode is not QImode. */
3818 off
= mem_mode
!= QImode
3819 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3820 - GET_MODE_SIZE (mem_mode
)
3824 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3825 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3831 data
->max_offset
= off
;
3833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3835 fprintf (dump_file
, "get_address_cost:\n");
3836 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3837 GET_MODE_NAME (mem_mode
),
3839 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3840 GET_MODE_NAME (mem_mode
),
3845 for (i
= 2; i
<= MAX_RATIO
; i
++)
3846 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3852 /* Compute the cost of various addressing modes. */
3854 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3855 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3857 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3858 || USE_STORE_PRE_DECREMENT (mem_mode
))
3860 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3861 has_predec
[mem_mode
]
3862 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3864 if (has_predec
[mem_mode
])
3865 data
->ainc_costs
[AINC_PRE_DEC
]
3866 = address_cost (addr
, mem_mode
, as
, speed
);
3868 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3869 || USE_STORE_POST_DECREMENT (mem_mode
))
3871 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3872 has_postdec
[mem_mode
]
3873 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3875 if (has_postdec
[mem_mode
])
3876 data
->ainc_costs
[AINC_POST_DEC
]
3877 = address_cost (addr
, mem_mode
, as
, speed
);
3879 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3880 || USE_STORE_PRE_DECREMENT (mem_mode
))
3882 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3883 has_preinc
[mem_mode
]
3884 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3886 if (has_preinc
[mem_mode
])
3887 data
->ainc_costs
[AINC_PRE_INC
]
3888 = address_cost (addr
, mem_mode
, as
, speed
);
3890 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3891 || USE_STORE_POST_INCREMENT (mem_mode
))
3893 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3894 has_postinc
[mem_mode
]
3895 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3897 if (has_postinc
[mem_mode
])
3898 data
->ainc_costs
[AINC_POST_INC
]
3899 = address_cost (addr
, mem_mode
, as
, speed
);
3901 for (i
= 0; i
< 16; i
++)
3904 var_p
= (i
>> 1) & 1;
3905 off_p
= (i
>> 2) & 1;
3906 rat_p
= (i
>> 3) & 1;
3910 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3911 gen_int_mode (rat
, address_mode
));
3914 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3918 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3919 /* ??? We can run into trouble with some backends by presenting
3920 it with symbols which haven't been properly passed through
3921 targetm.encode_section_info. By setting the local bit, we
3922 enhance the probability of things working. */
3923 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3926 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3928 (PLUS
, address_mode
, base
,
3929 gen_int_mode (off
, address_mode
)));
3932 base
= gen_int_mode (off
, address_mode
);
3937 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3940 /* To avoid splitting addressing modes, pretend that no cse will
3942 old_cse_not_expected
= cse_not_expected
;
3943 cse_not_expected
= true;
3944 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3945 cse_not_expected
= old_cse_not_expected
;
3949 acost
= seq_cost (seq
, speed
);
3950 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3954 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3957 /* On some targets, it is quite expensive to load symbol to a register,
3958 which makes addresses that contain symbols look much more expensive.
3959 However, the symbol will have to be loaded in any case before the
3960 loop (and quite likely we have it in register already), so it does not
3961 make much sense to penalize them too heavily. So make some final
3962 tweaks for the SYMBOL_PRESENT modes:
3964 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3965 var is cheaper, use this mode with small penalty.
3966 If VAR_PRESENT is true, try whether the mode with
3967 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3968 if this is the case, use it. */
3969 add_c
= add_cost (speed
, address_mode
);
3970 for (i
= 0; i
< 8; i
++)
3973 off_p
= (i
>> 1) & 1;
3974 rat_p
= (i
>> 2) & 1;
3976 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3980 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3981 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3986 fprintf (dump_file
, "Address costs:\n");
3988 for (i
= 0; i
< 16; i
++)
3991 var_p
= (i
>> 1) & 1;
3992 off_p
= (i
>> 2) & 1;
3993 rat_p
= (i
>> 3) & 1;
3995 fprintf (dump_file
, " ");
3997 fprintf (dump_file
, "sym + ");
3999 fprintf (dump_file
, "var + ");
4001 fprintf (dump_file
, "cst + ");
4003 fprintf (dump_file
, "rat * ");
4005 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4006 fprintf (dump_file
, "index costs %d\n", acost
);
4008 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4009 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4010 fprintf (dump_file
, " May include autoinc/dec\n");
4011 fprintf (dump_file
, "\n");
4014 address_cost_data_list
[data_index
] = data
;
4017 bits
= GET_MODE_BITSIZE (address_mode
);
4018 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4020 if ((offset
>> (bits
- 1) & 1))
4025 autoinc_type
= AINC_NONE
;
4026 msize
= GET_MODE_SIZE (mem_mode
);
4027 autoinc_offset
= offset
;
4029 autoinc_offset
+= ratio
* cstep
;
4030 if (symbol_present
|| var_present
|| ratio
!= 1)
4034 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4036 autoinc_type
= AINC_POST_INC
;
4037 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4039 autoinc_type
= AINC_POST_DEC
;
4040 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4042 autoinc_type
= AINC_PRE_INC
;
4043 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4045 autoinc_type
= AINC_PRE_DEC
;
4047 if (autoinc_type
!= AINC_NONE
)
4052 offset_p
= (s_offset
!= 0
4053 && data
->min_offset
<= s_offset
4054 && s_offset
<= data
->max_offset
);
4055 ratio_p
= (ratio
!= 1
4056 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4058 if (ratio
!= 1 && !ratio_p
)
4059 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4061 if (s_offset
&& !offset_p
&& !symbol_present
)
4062 cost
+= add_cost (speed
, address_mode
);
4065 *may_autoinc
= autoinc
;
4067 acost
= data
->ainc_costs
[autoinc_type
];
4069 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4070 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4071 return new_cost (cost
+ acost
, complexity
);
4074 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4075 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4076 calculating the operands of EXPR. Returns true if successful, and returns
4077 the cost in COST. */
4080 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4081 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4084 tree op1
= TREE_OPERAND (expr
, 1);
4085 tree cst
= TREE_OPERAND (mult
, 1);
4086 tree multop
= TREE_OPERAND (mult
, 0);
4087 int m
= exact_log2 (int_cst_value (cst
));
4088 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4089 int as_cost
, sa_cost
;
4092 if (!(m
>= 0 && m
< maxm
))
4096 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4098 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4100 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4101 use that in preference to a shift insn followed by an add insn. */
4102 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4103 ? shiftadd_cost (speed
, mode
, m
)
4105 ? shiftsub1_cost (speed
, mode
, m
)
4106 : shiftsub0_cost (speed
, mode
, m
)));
4108 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
4109 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
4111 STRIP_NOPS (multop
);
4112 if (!is_gimple_val (multop
))
4113 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
4119 /* Estimates cost of forcing expression EXPR into a variable. */
4122 force_expr_to_var_cost (tree expr
, bool speed
)
4124 static bool costs_initialized
= false;
4125 static unsigned integer_cost
[2];
4126 static unsigned symbol_cost
[2];
4127 static unsigned address_cost
[2];
4129 comp_cost cost0
, cost1
, cost
;
4132 if (!costs_initialized
)
4134 tree type
= build_pointer_type (integer_type_node
);
4139 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4140 TREE_STATIC (var
) = 1;
4141 x
= produce_memory_decl_rtl (var
, NULL
);
4142 SET_DECL_RTL (var
, x
);
4144 addr
= build1 (ADDR_EXPR
, type
, var
);
4147 for (i
= 0; i
< 2; i
++)
4149 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4152 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4155 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4156 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4158 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4159 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4160 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4161 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4162 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4163 fprintf (dump_file
, "\n");
4167 costs_initialized
= true;
4172 if (SSA_VAR_P (expr
))
4175 if (is_gimple_min_invariant (expr
))
4177 if (TREE_CODE (expr
) == INTEGER_CST
)
4178 return new_cost (integer_cost
[speed
], 0);
4180 if (TREE_CODE (expr
) == ADDR_EXPR
)
4182 tree obj
= TREE_OPERAND (expr
, 0);
4184 if (TREE_CODE (obj
) == VAR_DECL
4185 || TREE_CODE (obj
) == PARM_DECL
4186 || TREE_CODE (obj
) == RESULT_DECL
)
4187 return new_cost (symbol_cost
[speed
], 0);
4190 return new_cost (address_cost
[speed
], 0);
4193 switch (TREE_CODE (expr
))
4195 case POINTER_PLUS_EXPR
:
4199 op0
= TREE_OPERAND (expr
, 0);
4200 op1
= TREE_OPERAND (expr
, 1);
4207 op0
= TREE_OPERAND (expr
, 0);
4213 /* Just an arbitrary value, FIXME. */
4214 return new_cost (target_spill_cost
[speed
], 0);
4217 if (op0
== NULL_TREE
4218 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4221 cost0
= force_expr_to_var_cost (op0
, speed
);
4223 if (op1
== NULL_TREE
4224 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4227 cost1
= force_expr_to_var_cost (op1
, speed
);
4229 mode
= TYPE_MODE (TREE_TYPE (expr
));
4230 switch (TREE_CODE (expr
))
4232 case POINTER_PLUS_EXPR
:
4236 cost
= new_cost (add_cost (speed
, mode
), 0);
4237 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4239 tree mult
= NULL_TREE
;
4241 if (TREE_CODE (op1
) == MULT_EXPR
)
4243 else if (TREE_CODE (op0
) == MULT_EXPR
)
4246 if (mult
!= NULL_TREE
4247 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4248 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4256 tree inner_mode
, outer_mode
;
4257 outer_mode
= TREE_TYPE (expr
);
4258 inner_mode
= TREE_TYPE (op0
);
4259 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4260 TYPE_MODE (inner_mode
), speed
), 0);
4265 if (cst_and_fits_in_hwi (op0
))
4266 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4268 else if (cst_and_fits_in_hwi (op1
))
4269 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4272 return new_cost (target_spill_cost
[speed
], 0);
4279 cost
= add_costs (cost
, cost0
);
4280 cost
= add_costs (cost
, cost1
);
4282 /* Bound the cost by target_spill_cost. The parts of complicated
4283 computations often are either loop invariant or at least can
4284 be shared between several iv uses, so letting this grow without
4285 limits would not give reasonable results. */
4286 if (cost
.cost
> (int) target_spill_cost
[speed
])
4287 cost
.cost
= target_spill_cost
[speed
];
4292 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4293 invariants the computation depends on. */
4296 force_var_cost (struct ivopts_data
*data
,
4297 tree expr
, bitmap
*depends_on
)
4301 fd_ivopts_data
= data
;
4302 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4305 return force_expr_to_var_cost (expr
, data
->speed
);
4308 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4309 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4310 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4311 invariants the computation depends on. */
4314 split_address_cost (struct ivopts_data
*data
,
4315 tree addr
, bool *symbol_present
, bool *var_present
,
4316 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4319 HOST_WIDE_INT bitsize
;
4320 HOST_WIDE_INT bitpos
;
4323 int unsignedp
, volatilep
;
4325 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4326 &unsignedp
, &volatilep
, false);
4329 || bitpos
% BITS_PER_UNIT
!= 0
4330 || TREE_CODE (core
) != VAR_DECL
)
4332 *symbol_present
= false;
4333 *var_present
= true;
4334 fd_ivopts_data
= data
;
4335 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4336 return new_cost (target_spill_cost
[data
->speed
], 0);
4339 *offset
+= bitpos
/ BITS_PER_UNIT
;
4340 if (TREE_STATIC (core
)
4341 || DECL_EXTERNAL (core
))
4343 *symbol_present
= true;
4344 *var_present
= false;
4348 *symbol_present
= false;
4349 *var_present
= true;
4353 /* Estimates cost of expressing difference of addresses E1 - E2 as
4354 var + symbol + offset. The value of offset is added to OFFSET,
4355 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4356 part is missing. DEPENDS_ON is a set of the invariants the computation
4360 ptr_difference_cost (struct ivopts_data
*data
,
4361 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4362 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4364 HOST_WIDE_INT diff
= 0;
4365 aff_tree aff_e1
, aff_e2
;
4368 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4370 if (ptr_difference_const (e1
, e2
, &diff
))
4373 *symbol_present
= false;
4374 *var_present
= false;
4378 if (integer_zerop (e2
))
4379 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4380 symbol_present
, var_present
, offset
, depends_on
);
4382 *symbol_present
= false;
4383 *var_present
= true;
4385 type
= signed_type_for (TREE_TYPE (e1
));
4386 tree_to_aff_combination (e1
, type
, &aff_e1
);
4387 tree_to_aff_combination (e2
, type
, &aff_e2
);
4388 aff_combination_scale (&aff_e2
, -1);
4389 aff_combination_add (&aff_e1
, &aff_e2
);
4391 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4394 /* Estimates cost of expressing difference E1 - E2 as
4395 var + symbol + offset. The value of offset is added to OFFSET,
4396 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4397 part is missing. DEPENDS_ON is a set of the invariants the computation
4401 difference_cost (struct ivopts_data
*data
,
4402 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4403 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4405 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4406 unsigned HOST_WIDE_INT off1
, off2
;
4407 aff_tree aff_e1
, aff_e2
;
4410 e1
= strip_offset (e1
, &off1
);
4411 e2
= strip_offset (e2
, &off2
);
4412 *offset
+= off1
- off2
;
4417 if (TREE_CODE (e1
) == ADDR_EXPR
)
4418 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4419 offset
, depends_on
);
4420 *symbol_present
= false;
4422 if (operand_equal_p (e1
, e2
, 0))
4424 *var_present
= false;
4428 *var_present
= true;
4430 if (integer_zerop (e2
))
4431 return force_var_cost (data
, e1
, depends_on
);
4433 if (integer_zerop (e1
))
4435 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4436 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4440 type
= signed_type_for (TREE_TYPE (e1
));
4441 tree_to_aff_combination (e1
, type
, &aff_e1
);
4442 tree_to_aff_combination (e2
, type
, &aff_e2
);
4443 aff_combination_scale (&aff_e2
, -1);
4444 aff_combination_add (&aff_e1
, &aff_e2
);
4446 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4449 /* Returns true if AFF1 and AFF2 are identical. */
4452 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4456 if (aff1
->n
!= aff2
->n
)
4459 for (i
= 0; i
< aff1
->n
; i
++)
4461 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4464 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4470 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4473 get_expr_id (struct ivopts_data
*data
, tree expr
)
4475 struct iv_inv_expr_ent ent
;
4476 struct iv_inv_expr_ent
**slot
;
4479 ent
.hash
= iterative_hash_expr (expr
, 0);
4480 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4484 *slot
= XNEW (struct iv_inv_expr_ent
);
4485 (*slot
)->expr
= expr
;
4486 (*slot
)->hash
= ent
.hash
;
4487 (*slot
)->id
= data
->inv_expr_id
++;
4491 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4492 requires a new compiler generated temporary. Returns -1 otherwise.
4493 ADDRESS_P is a flag indicating if the expression is for address
4497 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4498 tree cbase
, HOST_WIDE_INT ratio
,
4501 aff_tree ubase_aff
, cbase_aff
;
4509 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4510 && (TREE_CODE (cbase
) == INTEGER_CST
))
4513 /* Strips the constant part. */
4514 if (TREE_CODE (ubase
) == PLUS_EXPR
4515 || TREE_CODE (ubase
) == MINUS_EXPR
4516 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4518 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4519 ubase
= TREE_OPERAND (ubase
, 0);
4522 /* Strips the constant part. */
4523 if (TREE_CODE (cbase
) == PLUS_EXPR
4524 || TREE_CODE (cbase
) == MINUS_EXPR
4525 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4527 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4528 cbase
= TREE_OPERAND (cbase
, 0);
4533 if (((TREE_CODE (ubase
) == SSA_NAME
)
4534 || (TREE_CODE (ubase
) == ADDR_EXPR
4535 && is_gimple_min_invariant (ubase
)))
4536 && (TREE_CODE (cbase
) == INTEGER_CST
))
4539 if (((TREE_CODE (cbase
) == SSA_NAME
)
4540 || (TREE_CODE (cbase
) == ADDR_EXPR
4541 && is_gimple_min_invariant (cbase
)))
4542 && (TREE_CODE (ubase
) == INTEGER_CST
))
4548 if (operand_equal_p (ubase
, cbase
, 0))
4551 if (TREE_CODE (ubase
) == ADDR_EXPR
4552 && TREE_CODE (cbase
) == ADDR_EXPR
)
4556 usym
= TREE_OPERAND (ubase
, 0);
4557 csym
= TREE_OPERAND (cbase
, 0);
4558 if (TREE_CODE (usym
) == ARRAY_REF
)
4560 tree ind
= TREE_OPERAND (usym
, 1);
4561 if (TREE_CODE (ind
) == INTEGER_CST
4562 && tree_fits_shwi_p (ind
)
4563 && tree_to_shwi (ind
) == 0)
4564 usym
= TREE_OPERAND (usym
, 0);
4566 if (TREE_CODE (csym
) == ARRAY_REF
)
4568 tree ind
= TREE_OPERAND (csym
, 1);
4569 if (TREE_CODE (ind
) == INTEGER_CST
4570 && tree_fits_shwi_p (ind
)
4571 && tree_to_shwi (ind
) == 0)
4572 csym
= TREE_OPERAND (csym
, 0);
4574 if (operand_equal_p (usym
, csym
, 0))
4577 /* Now do more complex comparison */
4578 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4579 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4580 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4584 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4585 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4587 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4588 aff_combination_add (&ubase_aff
, &cbase_aff
);
4589 expr
= aff_combination_to_tree (&ubase_aff
);
4590 return get_expr_id (data
, expr
);
4595 /* Determines the cost of the computation by that USE is expressed
4596 from induction variable CAND. If ADDRESS_P is true, we just need
4597 to create an address from it, otherwise we want to get it into
4598 register. A set of invariants we depend on is stored in
4599 DEPENDS_ON. AT is the statement at that the value is computed.
4600 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4601 addressing is likely. */
4604 get_computation_cost_at (struct ivopts_data
*data
,
4605 struct iv_use
*use
, struct iv_cand
*cand
,
4606 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4610 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4612 tree utype
= TREE_TYPE (ubase
), ctype
;
4613 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4614 HOST_WIDE_INT ratio
, aratio
;
4615 bool var_present
, symbol_present
, stmt_is_after_inc
;
4618 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4619 machine_mode mem_mode
= (address_p
4620 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4625 /* Only consider real candidates. */
4627 return infinite_cost
;
4629 cbase
= cand
->iv
->base
;
4630 cstep
= cand
->iv
->step
;
4631 ctype
= TREE_TYPE (cbase
);
4633 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4635 /* We do not have a precision to express the values of use. */
4636 return infinite_cost
;
4640 || (use
->iv
->base_object
4641 && cand
->iv
->base_object
4642 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4643 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4645 /* Do not try to express address of an object with computation based
4646 on address of a different object. This may cause problems in rtl
4647 level alias analysis (that does not expect this to be happening,
4648 as this is illegal in C), and would be unlikely to be useful
4650 if (use
->iv
->base_object
4651 && cand
->iv
->base_object
4652 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4653 return infinite_cost
;
4656 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4658 /* TODO -- add direct handling of this case. */
4662 /* CSTEPI is removed from the offset in case statement is after the
4663 increment. If the step is not constant, we use zero instead.
4664 This is a bit imprecise (there is the extra addition), but
4665 redundancy elimination is likely to transform the code so that
4666 it uses value of the variable before increment anyway,
4667 so it is not that much unrealistic. */
4668 if (cst_and_fits_in_hwi (cstep
))
4669 cstepi
= int_cst_value (cstep
);
4673 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4674 return infinite_cost
;
4676 if (wi::fits_shwi_p (rat
))
4677 ratio
= rat
.to_shwi ();
4679 return infinite_cost
;
4682 ctype
= TREE_TYPE (cbase
);
4684 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4686 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4687 or ratio == 1, it is better to handle this like
4689 ubase - ratio * cbase + ratio * var
4691 (also holds in the case ratio == -1, TODO. */
4693 if (cst_and_fits_in_hwi (cbase
))
4695 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4696 cost
= difference_cost (data
,
4697 ubase
, build_int_cst (utype
, 0),
4698 &symbol_present
, &var_present
, &offset
,
4700 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4702 else if (ratio
== 1)
4704 tree real_cbase
= cbase
;
4706 /* Check to see if any adjustment is needed. */
4707 if (cstepi
== 0 && stmt_is_after_inc
)
4709 aff_tree real_cbase_aff
;
4712 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4714 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4716 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4717 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4720 cost
= difference_cost (data
,
4722 &symbol_present
, &var_present
, &offset
,
4724 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4727 && !POINTER_TYPE_P (ctype
)
4728 && multiplier_allowed_in_address_p
4730 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4732 if (cstepi
== 0 && stmt_is_after_inc
)
4734 if (POINTER_TYPE_P (ctype
))
4735 cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4737 cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4740 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4741 cost
= difference_cost (data
,
4743 &symbol_present
, &var_present
, &offset
,
4745 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4749 cost
= force_var_cost (data
, cbase
, depends_on
);
4750 cost
= add_costs (cost
,
4751 difference_cost (data
,
4752 ubase
, build_int_cst (utype
, 0),
4753 &symbol_present
, &var_present
,
4754 &offset
, depends_on
));
4755 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4756 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4759 /* Set of invariants depended on by sub use has already been computed
4760 for the first use in the group. */
4764 if (depends_on
&& *depends_on
)
4765 bitmap_clear (*depends_on
);
4767 else if (inv_expr_id
)
4770 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4771 /* Clear depends on. */
4772 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4773 bitmap_clear (*depends_on
);
4776 /* If we are after the increment, the value of the candidate is higher by
4778 if (stmt_is_after_inc
)
4779 offset
-= ratio
* cstepi
;
4781 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4782 (symbol/var1/const parts may be omitted). If we are looking for an
4783 address, find the cost of addressing this. */
4785 return add_costs (cost
,
4786 get_address_cost (symbol_present
, var_present
,
4787 offset
, ratio
, cstepi
,
4789 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4790 speed
, stmt_is_after_inc
,
4793 /* Otherwise estimate the costs for computing the expression. */
4794 if (!symbol_present
&& !var_present
&& !offset
)
4797 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4801 /* Symbol + offset should be compile-time computable so consider that they
4802 are added once to the variable, if present. */
4803 if (var_present
&& (symbol_present
|| offset
))
4804 cost
.cost
+= adjust_setup_cost (data
,
4805 add_cost (speed
, TYPE_MODE (ctype
)));
4807 /* Having offset does not affect runtime cost in case it is added to
4808 symbol, but it increases complexity. */
4812 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4814 aratio
= ratio
> 0 ? ratio
: -ratio
;
4816 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4821 *can_autoinc
= false;
4824 /* Just get the expression, expand it and measure the cost. */
4825 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4828 return infinite_cost
;
4831 comp
= build_simple_mem_ref (comp
);
4833 return new_cost (computation_cost (comp
, speed
), 0);
4837 /* Determines the cost of the computation by that USE is expressed
4838 from induction variable CAND. If ADDRESS_P is true, we just need
4839 to create an address from it, otherwise we want to get it into
4840 register. A set of invariants we depend on is stored in
4841 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4842 autoinc addressing is likely. */
4845 get_computation_cost (struct ivopts_data
*data
,
4846 struct iv_use
*use
, struct iv_cand
*cand
,
4847 bool address_p
, bitmap
*depends_on
,
4848 bool *can_autoinc
, int *inv_expr_id
)
4850 return get_computation_cost_at (data
,
4851 use
, cand
, address_p
, depends_on
, use
->stmt
,
4852 can_autoinc
, inv_expr_id
);
4855 /* Determines cost of basing replacement of USE on CAND in a generic
4859 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4860 struct iv_use
*use
, struct iv_cand
*cand
)
4864 int inv_expr_id
= -1;
4866 /* The simple case first -- if we need to express value of the preserved
4867 original biv, the cost is 0. This also prevents us from counting the
4868 cost of increment twice -- once at this use and once in the cost of
4870 if (cand
->pos
== IP_ORIGINAL
4871 && cand
->incremented_at
== use
->stmt
)
4873 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4878 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4879 NULL
, &inv_expr_id
);
4881 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4884 return !infinite_cost_p (cost
);
4887 /* Determines cost of basing replacement of USE on CAND in an address. */
4890 determine_use_iv_cost_address (struct ivopts_data
*data
,
4891 struct iv_use
*use
, struct iv_cand
*cand
)
4895 int inv_expr_id
= -1;
4896 struct iv_use
*sub_use
;
4898 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4899 &can_autoinc
, &inv_expr_id
);
4901 if (cand
->ainc_use
== use
)
4904 cost
.cost
-= cand
->cost_step
;
4905 /* If we generated the candidate solely for exploiting autoincrement
4906 opportunities, and it turns out it can't be used, set the cost to
4907 infinity to make sure we ignore it. */
4908 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4909 cost
= infinite_cost
;
4911 for (sub_use
= use
->next
;
4912 sub_use
&& !infinite_cost_p (cost
);
4913 sub_use
= sub_use
->next
)
4915 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4916 &can_autoinc
, &inv_expr_id
);
4917 cost
= add_costs (cost
, sub_cost
);
4920 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4923 return !infinite_cost_p (cost
);
4926 /* Computes value of candidate CAND at position AT in iteration NITER, and
4927 stores it to VAL. */
4930 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4933 aff_tree step
, delta
, nit
;
4934 struct iv
*iv
= cand
->iv
;
4935 tree type
= TREE_TYPE (iv
->base
);
4936 tree steptype
= type
;
4937 if (POINTER_TYPE_P (type
))
4938 steptype
= sizetype
;
4939 steptype
= unsigned_type_for (type
);
4941 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4942 aff_combination_convert (&step
, steptype
);
4943 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4944 aff_combination_convert (&nit
, steptype
);
4945 aff_combination_mult (&nit
, &step
, &delta
);
4946 if (stmt_after_increment (loop
, cand
, at
))
4947 aff_combination_add (&delta
, &step
);
4949 tree_to_aff_combination (iv
->base
, type
, val
);
4950 if (!POINTER_TYPE_P (type
))
4951 aff_combination_convert (val
, steptype
);
4952 aff_combination_add (val
, &delta
);
4955 /* Returns period of induction variable iv. */
4958 iv_period (struct iv
*iv
)
4960 tree step
= iv
->step
, period
, type
;
4963 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4965 type
= unsigned_type_for (TREE_TYPE (step
));
4966 /* Period of the iv is lcm (step, type_range)/step -1,
4967 i.e., N*type_range/step - 1. Since type range is power
4968 of two, N == (step >> num_of_ending_zeros_binary (step),
4969 so the final result is
4971 (type_range >> num_of_ending_zeros_binary (step)) - 1
4974 pow2div
= num_ending_zeros (step
);
4976 period
= build_low_bits_mask (type
,
4977 (TYPE_PRECISION (type
)
4978 - tree_to_uhwi (pow2div
)));
4983 /* Returns the comparison operator used when eliminating the iv USE. */
4985 static enum tree_code
4986 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4988 struct loop
*loop
= data
->current_loop
;
4992 ex_bb
= gimple_bb (use
->stmt
);
4993 exit
= EDGE_SUCC (ex_bb
, 0);
4994 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4995 exit
= EDGE_SUCC (ex_bb
, 1);
4997 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
5000 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5001 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5002 calculation is performed in non-wrapping type.
5004 TODO: More generally, we could test for the situation that
5005 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5006 This would require knowing the sign of OFFSET. */
5009 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5011 enum tree_code code
;
5013 aff_tree aff_e1
, aff_e2
, aff_offset
;
5015 if (!nowrap_type_p (TREE_TYPE (base
)))
5018 base
= expand_simple_operations (base
);
5020 if (TREE_CODE (base
) == SSA_NAME
)
5022 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5024 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5027 code
= gimple_assign_rhs_code (stmt
);
5028 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5031 e1
= gimple_assign_rhs1 (stmt
);
5032 e2
= gimple_assign_rhs2 (stmt
);
5036 code
= TREE_CODE (base
);
5037 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5039 e1
= TREE_OPERAND (base
, 0);
5040 e2
= TREE_OPERAND (base
, 1);
5043 /* Use affine expansion as deeper inspection to prove the equality. */
5044 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5045 &aff_e2
, &data
->name_expansion_cache
);
5046 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5047 &aff_offset
, &data
->name_expansion_cache
);
5048 aff_combination_scale (&aff_offset
, -1);
5052 aff_combination_add (&aff_e2
, &aff_offset
);
5053 if (aff_combination_zero_p (&aff_e2
))
5056 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5057 &aff_e1
, &data
->name_expansion_cache
);
5058 aff_combination_add (&aff_e1
, &aff_offset
);
5059 return aff_combination_zero_p (&aff_e1
);
5061 case POINTER_PLUS_EXPR
:
5062 aff_combination_add (&aff_e2
, &aff_offset
);
5063 return aff_combination_zero_p (&aff_e2
);
5070 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5071 comparison with CAND. NITER describes the number of iterations of
5072 the loops. If successful, the comparison in COMP_P is altered accordingly.
5074 We aim to handle the following situation:
5090 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5091 We aim to optimize this to
5099 while (p < p_0 - a + b);
5101 This preserves the correctness, since the pointer arithmetics does not
5102 overflow. More precisely:
5104 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5105 overflow in computing it or the values of p.
5106 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5107 overflow. To prove this, we use the fact that p_0 = base + a. */
5110 iv_elimination_compare_lt (struct ivopts_data
*data
,
5111 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5112 struct tree_niter_desc
*niter
)
5114 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5115 struct aff_tree nit
, tmpa
, tmpb
;
5116 enum tree_code comp
;
5119 /* We need to know that the candidate induction variable does not overflow.
5120 While more complex analysis may be used to prove this, for now just
5121 check that the variable appears in the original program and that it
5122 is computed in a type that guarantees no overflows. */
5123 cand_type
= TREE_TYPE (cand
->iv
->base
);
5124 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5127 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5128 the calculation of the BOUND could overflow, making the comparison
5130 if (!data
->loop_single_exit_p
)
5133 /* We need to be able to decide whether candidate is increasing or decreasing
5134 in order to choose the right comparison operator. */
5135 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5137 step
= int_cst_value (cand
->iv
->step
);
5139 /* Check that the number of iterations matches the expected pattern:
5140 a + 1 > b ? 0 : b - a - 1. */
5141 mbz
= niter
->may_be_zero
;
5142 if (TREE_CODE (mbz
) == GT_EXPR
)
5144 /* Handle a + 1 > b. */
5145 tree op0
= TREE_OPERAND (mbz
, 0);
5146 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5148 a
= TREE_OPERAND (op0
, 0);
5149 b
= TREE_OPERAND (mbz
, 1);
5154 else if (TREE_CODE (mbz
) == LT_EXPR
)
5156 tree op1
= TREE_OPERAND (mbz
, 1);
5158 /* Handle b < a + 1. */
5159 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5161 a
= TREE_OPERAND (op1
, 0);
5162 b
= TREE_OPERAND (mbz
, 0);
5170 /* Expected number of iterations is B - A - 1. Check that it matches
5171 the actual number, i.e., that B - A - NITER = 1. */
5172 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5173 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5174 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5175 aff_combination_scale (&nit
, -1);
5176 aff_combination_scale (&tmpa
, -1);
5177 aff_combination_add (&tmpb
, &tmpa
);
5178 aff_combination_add (&tmpb
, &nit
);
5179 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5182 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5184 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5186 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5187 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5190 /* Determine the new comparison operator. */
5191 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5192 if (*comp_p
== NE_EXPR
)
5194 else if (*comp_p
== EQ_EXPR
)
5195 *comp_p
= invert_tree_comparison (comp
, false);
5202 /* Check whether it is possible to express the condition in USE by comparison
5203 of candidate CAND. If so, store the value compared with to BOUND, and the
5204 comparison operator to COMP. */
5207 may_eliminate_iv (struct ivopts_data
*data
,
5208 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5209 enum tree_code
*comp
)
5214 struct loop
*loop
= data
->current_loop
;
5216 struct tree_niter_desc
*desc
= NULL
;
5218 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5221 /* For now works only for exits that dominate the loop latch.
5222 TODO: extend to other conditions inside loop body. */
5223 ex_bb
= gimple_bb (use
->stmt
);
5224 if (use
->stmt
!= last_stmt (ex_bb
)
5225 || gimple_code (use
->stmt
) != GIMPLE_COND
5226 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5229 exit
= EDGE_SUCC (ex_bb
, 0);
5230 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5231 exit
= EDGE_SUCC (ex_bb
, 1);
5232 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5235 desc
= niter_for_exit (data
, exit
);
5239 /* Determine whether we can use the variable to test the exit condition.
5240 This is the case iff the period of the induction variable is greater
5241 than the number of iterations for which the exit condition is true. */
5242 period
= iv_period (cand
->iv
);
5244 /* If the number of iterations is constant, compare against it directly. */
5245 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5247 /* See cand_value_at. */
5248 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5250 if (!tree_int_cst_lt (desc
->niter
, period
))
5255 if (tree_int_cst_lt (period
, desc
->niter
))
5260 /* If not, and if this is the only possible exit of the loop, see whether
5261 we can get a conservative estimate on the number of iterations of the
5262 entire loop and compare against that instead. */
5265 widest_int period_value
, max_niter
;
5267 max_niter
= desc
->max
;
5268 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5270 period_value
= wi::to_widest (period
);
5271 if (wi::gtu_p (max_niter
, period_value
))
5273 /* See if we can take advantage of inferred loop bound information. */
5274 if (data
->loop_single_exit_p
)
5276 if (!max_loop_iterations (loop
, &max_niter
))
5278 /* The loop bound is already adjusted by adding 1. */
5279 if (wi::gtu_p (max_niter
, period_value
))
5287 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5289 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5290 aff_combination_to_tree (&bnd
));
5291 *comp
= iv_elimination_compare (data
, use
);
5293 /* It is unlikely that computing the number of iterations using division
5294 would be more profitable than keeping the original induction variable. */
5295 if (expression_expensive_p (*bound
))
5298 /* Sometimes, it is possible to handle the situation that the number of
5299 iterations may be zero unless additional assumtions by using <
5300 instead of != in the exit condition.
5302 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5303 base the exit condition on it. However, that is often too
5305 if (!integer_zerop (desc
->may_be_zero
))
5306 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5311 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5312 be copied, if it is used in the loop body and DATA->body_includes_call. */
5315 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5317 tree sbound
= bound
;
5318 STRIP_NOPS (sbound
);
5320 if (TREE_CODE (sbound
) == SSA_NAME
5321 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5322 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5323 && data
->body_includes_call
)
5324 return COSTS_N_INSNS (1);
5329 /* Determines cost of basing replacement of USE on CAND in a condition. */
5332 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5333 struct iv_use
*use
, struct iv_cand
*cand
)
5335 tree bound
= NULL_TREE
;
5337 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5338 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5340 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5341 tree
*control_var
, *bound_cst
;
5342 enum tree_code comp
= ERROR_MARK
;
5344 /* Only consider real candidates. */
5347 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5352 /* Try iv elimination. */
5353 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5355 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5356 if (elim_cost
.cost
== 0)
5357 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5358 else if (TREE_CODE (bound
) == INTEGER_CST
)
5360 /* If we replace a loop condition 'i < n' with 'p < base + n',
5361 depends_on_elim will have 'base' and 'n' set, which implies
5362 that both 'base' and 'n' will be live during the loop. More likely,
5363 'base + n' will be loop invariant, resulting in only one live value
5364 during the loop. So in that case we clear depends_on_elim and set
5365 elim_inv_expr_id instead. */
5366 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5368 elim_inv_expr_id
= get_expr_id (data
, bound
);
5369 bitmap_clear (depends_on_elim
);
5371 /* The bound is a loop invariant, so it will be only computed
5373 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5376 elim_cost
= infinite_cost
;
5378 /* Try expressing the original giv. If it is compared with an invariant,
5379 note that we cannot get rid of it. */
5380 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5384 /* When the condition is a comparison of the candidate IV against
5385 zero, prefer this IV.
5387 TODO: The constant that we're subtracting from the cost should
5388 be target-dependent. This information should be added to the
5389 target costs for each backend. */
5390 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5391 && integer_zerop (*bound_cst
)
5392 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5393 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5394 elim_cost
.cost
-= 1;
5396 express_cost
= get_computation_cost (data
, use
, cand
, false,
5397 &depends_on_express
, NULL
,
5398 &express_inv_expr_id
);
5399 fd_ivopts_data
= data
;
5400 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5402 /* Count the cost of the original bound as well. */
5403 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5404 if (bound_cost
.cost
== 0)
5405 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5406 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5407 bound_cost
.cost
= 0;
5408 express_cost
.cost
+= bound_cost
.cost
;
5410 /* Choose the better approach, preferring the eliminated IV. */
5411 if (compare_costs (elim_cost
, express_cost
) <= 0)
5414 depends_on
= depends_on_elim
;
5415 depends_on_elim
= NULL
;
5416 inv_expr_id
= elim_inv_expr_id
;
5420 cost
= express_cost
;
5421 depends_on
= depends_on_express
;
5422 depends_on_express
= NULL
;
5425 inv_expr_id
= express_inv_expr_id
;
5428 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5430 if (depends_on_elim
)
5431 BITMAP_FREE (depends_on_elim
);
5432 if (depends_on_express
)
5433 BITMAP_FREE (depends_on_express
);
5435 return !infinite_cost_p (cost
);
5438 /* Determines cost of basing replacement of USE on CAND. Returns false
5439 if USE cannot be based on CAND. */
5442 determine_use_iv_cost (struct ivopts_data
*data
,
5443 struct iv_use
*use
, struct iv_cand
*cand
)
5447 case USE_NONLINEAR_EXPR
:
5448 return determine_use_iv_cost_generic (data
, use
, cand
);
5451 return determine_use_iv_cost_address (data
, use
, cand
);
5454 return determine_use_iv_cost_condition (data
, use
, cand
);
5461 /* Return true if get_computation_cost indicates that autoincrement is
5462 a possibility for the pair of USE and CAND, false otherwise. */
5465 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5466 struct iv_cand
*cand
)
5472 if (use
->type
!= USE_ADDRESS
)
5475 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5476 &can_autoinc
, NULL
);
5478 BITMAP_FREE (depends_on
);
5480 return !infinite_cost_p (cost
) && can_autoinc
;
5483 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5484 use that allows autoincrement, and set their AINC_USE if possible. */
5487 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5491 for (i
= 0; i
< n_iv_cands (data
); i
++)
5493 struct iv_cand
*cand
= iv_cand (data
, i
);
5494 struct iv_use
*closest_before
= NULL
;
5495 struct iv_use
*closest_after
= NULL
;
5496 if (cand
->pos
!= IP_ORIGINAL
)
5499 for (j
= 0; j
< n_iv_uses (data
); j
++)
5501 struct iv_use
*use
= iv_use (data
, j
);
5502 unsigned uid
= gimple_uid (use
->stmt
);
5504 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5507 if (uid
< gimple_uid (cand
->incremented_at
)
5508 && (closest_before
== NULL
5509 || uid
> gimple_uid (closest_before
->stmt
)))
5510 closest_before
= use
;
5512 if (uid
> gimple_uid (cand
->incremented_at
)
5513 && (closest_after
== NULL
5514 || uid
< gimple_uid (closest_after
->stmt
)))
5515 closest_after
= use
;
5518 if (closest_before
!= NULL
5519 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5520 cand
->ainc_use
= closest_before
;
5521 else if (closest_after
!= NULL
5522 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5523 cand
->ainc_use
= closest_after
;
5527 /* Finds the candidates for the induction variables. */
5530 find_iv_candidates (struct ivopts_data
*data
)
5532 /* Add commonly used ivs. */
5533 add_standard_iv_candidates (data
);
5535 /* Add old induction variables. */
5536 add_iv_candidate_for_bivs (data
);
5538 /* Add induction variables derived from uses. */
5539 add_iv_candidate_for_uses (data
);
5541 set_autoinc_for_original_candidates (data
);
5543 /* Record the important candidates. */
5544 record_important_candidates (data
);
5547 /* Determines costs of basing the use of the iv on an iv candidate. */
5550 determine_use_iv_costs (struct ivopts_data
*data
)
5554 struct iv_cand
*cand
;
5555 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5557 alloc_use_cost_map (data
);
5559 for (i
= 0; i
< n_iv_uses (data
); i
++)
5561 use
= iv_use (data
, i
);
5563 if (data
->consider_all_candidates
)
5565 for (j
= 0; j
< n_iv_cands (data
); j
++)
5567 cand
= iv_cand (data
, j
);
5568 determine_use_iv_cost (data
, use
, cand
);
5575 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5577 cand
= iv_cand (data
, j
);
5578 if (!determine_use_iv_cost (data
, use
, cand
))
5579 bitmap_set_bit (to_clear
, j
);
5582 /* Remove the candidates for that the cost is infinite from
5583 the list of related candidates. */
5584 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5585 bitmap_clear (to_clear
);
5589 BITMAP_FREE (to_clear
);
5591 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5593 fprintf (dump_file
, "Use-candidate costs:\n");
5595 for (i
= 0; i
< n_iv_uses (data
); i
++)
5597 use
= iv_use (data
, i
);
5599 fprintf (dump_file
, "Use %d:\n", i
);
5600 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5601 for (j
= 0; j
< use
->n_map_members
; j
++)
5603 if (!use
->cost_map
[j
].cand
5604 || infinite_cost_p (use
->cost_map
[j
].cost
))
5607 fprintf (dump_file
, " %d\t%d\t%d\t",
5608 use
->cost_map
[j
].cand
->id
,
5609 use
->cost_map
[j
].cost
.cost
,
5610 use
->cost_map
[j
].cost
.complexity
);
5611 if (use
->cost_map
[j
].depends_on
)
5612 bitmap_print (dump_file
,
5613 use
->cost_map
[j
].depends_on
, "","");
5614 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5615 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5616 fprintf (dump_file
, "\n");
5619 fprintf (dump_file
, "\n");
5621 fprintf (dump_file
, "\n");
5625 /* Determines cost of the candidate CAND. */
5628 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5630 comp_cost cost_base
;
5631 unsigned cost
, cost_step
;
5640 /* There are two costs associated with the candidate -- its increment
5641 and its initialization. The second is almost negligible for any loop
5642 that rolls enough, so we take it just very little into account. */
5644 base
= cand
->iv
->base
;
5645 cost_base
= force_var_cost (data
, base
, NULL
);
5646 /* It will be exceptional that the iv register happens to be initialized with
5647 the proper value at no cost. In general, there will at least be a regcopy
5649 if (cost_base
.cost
== 0)
5650 cost_base
.cost
= COSTS_N_INSNS (1);
5651 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5653 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5655 /* Prefer the original ivs unless we may gain something by replacing it.
5656 The reason is to make debugging simpler; so this is not relevant for
5657 artificial ivs created by other optimization passes. */
5658 if (cand
->pos
!= IP_ORIGINAL
5659 || !SSA_NAME_VAR (cand
->var_before
)
5660 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5663 /* Prefer not to insert statements into latch unless there are some
5664 already (so that we do not create unnecessary jumps). */
5665 if (cand
->pos
== IP_END
5666 && empty_block_p (ip_end_pos (data
->current_loop
)))
5670 cand
->cost_step
= cost_step
;
5673 /* Determines costs of computation of the candidates. */
5676 determine_iv_costs (struct ivopts_data
*data
)
5680 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5682 fprintf (dump_file
, "Candidate costs:\n");
5683 fprintf (dump_file
, " cand\tcost\n");
5686 for (i
= 0; i
< n_iv_cands (data
); i
++)
5688 struct iv_cand
*cand
= iv_cand (data
, i
);
5690 determine_iv_cost (data
, cand
);
5692 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5693 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5697 fprintf (dump_file
, "\n");
5700 /* Calculates cost for having SIZE induction variables. */
5703 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5705 /* We add size to the cost, so that we prefer eliminating ivs
5707 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5708 data
->body_includes_call
);
5711 /* For each size of the induction variable set determine the penalty. */
5714 determine_set_costs (struct ivopts_data
*data
)
5720 struct loop
*loop
= data
->current_loop
;
5723 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5725 fprintf (dump_file
, "Global costs:\n");
5726 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5727 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5728 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5729 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5733 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5736 op
= PHI_RESULT (phi
);
5738 if (virtual_operand_p (op
))
5741 if (get_iv (data
, op
))
5747 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5749 struct version_info
*info
= ver_info (data
, j
);
5751 if (info
->inv_id
&& info
->has_nonlin_use
)
5755 data
->regs_used
= n
;
5756 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5757 fprintf (dump_file
, " regs_used %d\n", n
);
5759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5761 fprintf (dump_file
, " cost for size:\n");
5762 fprintf (dump_file
, " ivs\tcost\n");
5763 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5764 fprintf (dump_file
, " %d\t%d\n", j
,
5765 ivopts_global_cost_for_size (data
, j
));
5766 fprintf (dump_file
, "\n");
5770 /* Returns true if A is a cheaper cost pair than B. */
5773 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5783 cmp
= compare_costs (a
->cost
, b
->cost
);
5790 /* In case the costs are the same, prefer the cheaper candidate. */
5791 if (a
->cand
->cost
< b
->cand
->cost
)
5798 /* Returns candidate by that USE is expressed in IVS. */
5800 static struct cost_pair
*
5801 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5803 return ivs
->cand_for_use
[use
->id
];
5806 /* Computes the cost field of IVS structure. */
5809 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5811 comp_cost cost
= ivs
->cand_use_cost
;
5813 cost
.cost
+= ivs
->cand_cost
;
5815 cost
.cost
+= ivopts_global_cost_for_size (data
,
5816 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5821 /* Remove invariants in set INVS to set IVS. */
5824 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5832 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5834 ivs
->n_invariant_uses
[iid
]--;
5835 if (ivs
->n_invariant_uses
[iid
] == 0)
5840 /* Set USE not to be expressed by any candidate in IVS. */
5843 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5846 unsigned uid
= use
->id
, cid
;
5847 struct cost_pair
*cp
;
5849 cp
= ivs
->cand_for_use
[uid
];
5855 ivs
->cand_for_use
[uid
] = NULL
;
5856 ivs
->n_cand_uses
[cid
]--;
5858 if (ivs
->n_cand_uses
[cid
] == 0)
5860 bitmap_clear_bit (ivs
->cands
, cid
);
5861 /* Do not count the pseudocandidates. */
5865 ivs
->cand_cost
-= cp
->cand
->cost
;
5867 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5870 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5872 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5874 if (cp
->inv_expr_id
!= -1)
5876 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5877 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5878 ivs
->num_used_inv_expr
--;
5880 iv_ca_recount_cost (data
, ivs
);
5883 /* Add invariants in set INVS to set IVS. */
5886 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5894 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5896 ivs
->n_invariant_uses
[iid
]++;
5897 if (ivs
->n_invariant_uses
[iid
] == 1)
5902 /* Set cost pair for USE in set IVS to CP. */
5905 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5906 struct iv_use
*use
, struct cost_pair
*cp
)
5908 unsigned uid
= use
->id
, cid
;
5910 if (ivs
->cand_for_use
[uid
] == cp
)
5913 if (ivs
->cand_for_use
[uid
])
5914 iv_ca_set_no_cp (data
, ivs
, use
);
5921 ivs
->cand_for_use
[uid
] = cp
;
5922 ivs
->n_cand_uses
[cid
]++;
5923 if (ivs
->n_cand_uses
[cid
] == 1)
5925 bitmap_set_bit (ivs
->cands
, cid
);
5926 /* Do not count the pseudocandidates. */
5930 ivs
->cand_cost
+= cp
->cand
->cost
;
5932 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5935 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5936 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5938 if (cp
->inv_expr_id
!= -1)
5940 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5941 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5942 ivs
->num_used_inv_expr
++;
5944 iv_ca_recount_cost (data
, ivs
);
5948 /* Extend set IVS by expressing USE by some of the candidates in it
5949 if possible. Consider all important candidates if candidates in
5950 set IVS don't give any result. */
5953 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5956 struct cost_pair
*best_cp
= NULL
, *cp
;
5959 struct iv_cand
*cand
;
5961 gcc_assert (ivs
->upto
>= use
->id
);
5965 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5967 cand
= iv_cand (data
, i
);
5968 cp
= get_use_iv_cost (data
, use
, cand
);
5969 if (cheaper_cost_pair (cp
, best_cp
))
5973 if (best_cp
== NULL
)
5975 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5977 cand
= iv_cand (data
, i
);
5978 cp
= get_use_iv_cost (data
, use
, cand
);
5979 if (cheaper_cost_pair (cp
, best_cp
))
5984 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5987 /* Get cost for assignment IVS. */
5990 iv_ca_cost (struct iv_ca
*ivs
)
5992 /* This was a conditional expression but it triggered a bug in
5995 return infinite_cost
;
6000 /* Returns true if all dependences of CP are among invariants in IVS. */
6003 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6008 if (!cp
->depends_on
)
6011 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6013 if (ivs
->n_invariant_uses
[i
] == 0)
6020 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6021 it before NEXT_CHANGE. */
6023 static struct iv_ca_delta
*
6024 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
6025 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
6027 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6030 change
->old_cp
= old_cp
;
6031 change
->new_cp
= new_cp
;
6032 change
->next_change
= next_change
;
6037 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6040 static struct iv_ca_delta
*
6041 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6043 struct iv_ca_delta
*last
;
6051 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
6053 last
->next_change
= l2
;
6058 /* Reverse the list of changes DELTA, forming the inverse to it. */
6060 static struct iv_ca_delta
*
6061 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6063 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6065 for (act
= delta
; act
; act
= next
)
6067 next
= act
->next_change
;
6068 act
->next_change
= prev
;
6071 std::swap (act
->old_cp
, act
->new_cp
);
6077 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6078 reverted instead. */
6081 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6082 struct iv_ca_delta
*delta
, bool forward
)
6084 struct cost_pair
*from
, *to
;
6085 struct iv_ca_delta
*act
;
6088 delta
= iv_ca_delta_reverse (delta
);
6090 for (act
= delta
; act
; act
= act
->next_change
)
6094 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
6095 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
6099 iv_ca_delta_reverse (delta
);
6102 /* Returns true if CAND is used in IVS. */
6105 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6107 return ivs
->n_cand_uses
[cand
->id
] > 0;
6110 /* Returns number of induction variable candidates in the set IVS. */
6113 iv_ca_n_cands (struct iv_ca
*ivs
)
6115 return ivs
->n_cands
;
6118 /* Free the list of changes DELTA. */
6121 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6123 struct iv_ca_delta
*act
, *next
;
6125 for (act
= *delta
; act
; act
= next
)
6127 next
= act
->next_change
;
6134 /* Allocates new iv candidates assignment. */
6136 static struct iv_ca
*
6137 iv_ca_new (struct ivopts_data
*data
)
6139 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6143 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
6144 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
6145 nw
->cands
= BITMAP_ALLOC (NULL
);
6148 nw
->cand_use_cost
= no_cost
;
6150 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6152 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
6153 nw
->num_used_inv_expr
= 0;
6158 /* Free memory occupied by the set IVS. */
6161 iv_ca_free (struct iv_ca
**ivs
)
6163 free ((*ivs
)->cand_for_use
);
6164 free ((*ivs
)->n_cand_uses
);
6165 BITMAP_FREE ((*ivs
)->cands
);
6166 free ((*ivs
)->n_invariant_uses
);
6167 free ((*ivs
)->used_inv_expr
);
6172 /* Dumps IVS to FILE. */
6175 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6177 const char *pref
= " invariants ";
6179 comp_cost cost
= iv_ca_cost (ivs
);
6181 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6182 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6183 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6184 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6186 for (i
= 0; i
< ivs
->upto
; i
++)
6188 struct iv_use
*use
= iv_use (data
, i
);
6189 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6191 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6192 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6194 fprintf (file
, " use:%d --> ??\n", use
->id
);
6197 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6198 if (ivs
->n_invariant_uses
[i
])
6200 fprintf (file
, "%s%d", pref
, i
);
6203 fprintf (file
, "\n\n");
6206 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6207 new set, and store differences in DELTA. Number of induction variables
6208 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6209 the function will try to find a solution with mimimal iv candidates. */
6212 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6213 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6214 unsigned *n_ivs
, bool min_ncand
)
6219 struct cost_pair
*old_cp
, *new_cp
;
6222 for (i
= 0; i
< ivs
->upto
; i
++)
6224 use
= iv_use (data
, i
);
6225 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6228 && old_cp
->cand
== cand
)
6231 new_cp
= get_use_iv_cost (data
, use
, cand
);
6235 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6238 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6241 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6244 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6245 cost
= iv_ca_cost (ivs
);
6247 *n_ivs
= iv_ca_n_cands (ivs
);
6248 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6253 /* Try narrowing set IVS by removing CAND. Return the cost of
6254 the new set and store the differences in DELTA. START is
6255 the candidate with which we start narrowing. */
6258 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6259 struct iv_cand
*cand
, struct iv_cand
*start
,
6260 struct iv_ca_delta
**delta
)
6264 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6266 struct iv_cand
*cnd
;
6267 comp_cost cost
, best_cost
, acost
;
6270 for (i
= 0; i
< n_iv_uses (data
); i
++)
6272 use
= iv_use (data
, i
);
6274 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6275 if (old_cp
->cand
!= cand
)
6278 best_cost
= iv_ca_cost (ivs
);
6279 /* Start narrowing with START. */
6280 new_cp
= get_use_iv_cost (data
, use
, start
);
6282 if (data
->consider_all_candidates
)
6284 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6286 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6289 cnd
= iv_cand (data
, ci
);
6291 cp
= get_use_iv_cost (data
, use
, cnd
);
6295 iv_ca_set_cp (data
, ivs
, use
, cp
);
6296 acost
= iv_ca_cost (ivs
);
6298 if (compare_costs (acost
, best_cost
) < 0)
6307 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6309 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6312 cnd
= iv_cand (data
, ci
);
6314 cp
= get_use_iv_cost (data
, use
, cnd
);
6318 iv_ca_set_cp (data
, ivs
, use
, cp
);
6319 acost
= iv_ca_cost (ivs
);
6321 if (compare_costs (acost
, best_cost
) < 0)
6328 /* Restore to old cp for use. */
6329 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6333 iv_ca_delta_free (delta
);
6334 return infinite_cost
;
6337 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6340 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6341 cost
= iv_ca_cost (ivs
);
6342 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6347 /* Try optimizing the set of candidates IVS by removing candidates different
6348 from to EXCEPT_CAND from it. Return cost of the new set, and store
6349 differences in DELTA. */
6352 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6353 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6356 struct iv_ca_delta
*act_delta
, *best_delta
;
6358 comp_cost best_cost
, acost
;
6359 struct iv_cand
*cand
;
6362 best_cost
= iv_ca_cost (ivs
);
6364 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6366 cand
= iv_cand (data
, i
);
6368 if (cand
== except_cand
)
6371 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6373 if (compare_costs (acost
, best_cost
) < 0)
6376 iv_ca_delta_free (&best_delta
);
6377 best_delta
= act_delta
;
6380 iv_ca_delta_free (&act_delta
);
6389 /* Recurse to possibly remove other unnecessary ivs. */
6390 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6391 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6392 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6393 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6397 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6398 cheaper local cost for USE than BEST_CP. Return pointer to
6399 the corresponding cost_pair, otherwise just return BEST_CP. */
6401 static struct cost_pair
*
6402 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6403 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6404 struct cost_pair
*best_cp
)
6406 struct iv_cand
*cand
;
6407 struct cost_pair
*cp
;
6409 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6410 if (cand_idx
== old_cand
->id
)
6413 cand
= iv_cand (data
, cand_idx
);
6414 cp
= get_use_iv_cost (data
, use
, cand
);
6415 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6421 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6422 which are used by more than one iv uses. For each of those candidates,
6423 this function tries to represent iv uses under that candidate using
6424 other ones with lower local cost, then tries to prune the new set.
6425 If the new set has lower cost, It returns the new cost after recording
6426 candidate replacement in list DELTA. */
6429 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6430 struct iv_ca_delta
**delta
)
6432 bitmap_iterator bi
, bj
;
6433 unsigned int i
, j
, k
;
6435 struct iv_cand
*cand
;
6436 comp_cost orig_cost
, acost
;
6437 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6438 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6441 orig_cost
= iv_ca_cost (ivs
);
6443 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6445 if (ivs
->n_cand_uses
[i
] == 1
6446 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6449 cand
= iv_cand (data
, i
);
6452 /* Represent uses under current candidate using other ones with
6453 lower local cost. */
6454 for (j
= 0; j
< ivs
->upto
; j
++)
6456 use
= iv_use (data
, j
);
6457 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6459 if (old_cp
->cand
!= cand
)
6463 if (data
->consider_all_candidates
)
6464 for (k
= 0; k
< n_iv_cands (data
); k
++)
6465 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6466 old_cp
->cand
, best_cp
);
6468 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6469 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6470 old_cp
->cand
, best_cp
);
6472 if (best_cp
== old_cp
)
6475 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6477 /* No need for further prune. */
6481 /* Prune the new candidate set. */
6482 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6483 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6484 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6485 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6487 if (compare_costs (acost
, orig_cost
) < 0)
6493 iv_ca_delta_free (&act_delta
);
6499 /* Tries to extend the sets IVS in the best possible way in order
6500 to express the USE. If ORIGINALP is true, prefer candidates from
6501 the original set of IVs, otherwise favor important candidates not
6502 based on any memory object. */
6505 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6506 struct iv_use
*use
, bool originalp
)
6508 comp_cost best_cost
, act_cost
;
6511 struct iv_cand
*cand
;
6512 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6513 struct cost_pair
*cp
;
6515 iv_ca_add_use (data
, ivs
, use
);
6516 best_cost
= iv_ca_cost (ivs
);
6517 cp
= iv_ca_cand_for_use (ivs
, use
);
6520 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6521 iv_ca_set_no_cp (data
, ivs
, use
);
6524 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6525 first try important candidates not based on any memory object. Only if
6526 this fails, try the specific ones. Rationale -- in loops with many
6527 variables the best choice often is to use just one generic biv. If we
6528 added here many ivs specific to the uses, the optimization algorithm later
6529 would be likely to get stuck in a local minimum, thus causing us to create
6530 too many ivs. The approach from few ivs to more seems more likely to be
6531 successful -- starting from few ivs, replacing an expensive use by a
6532 specific iv should always be a win. */
6533 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6535 cand
= iv_cand (data
, i
);
6537 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6540 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6543 if (iv_ca_cand_used_p (ivs
, cand
))
6546 cp
= get_use_iv_cost (data
, use
, cand
);
6550 iv_ca_set_cp (data
, ivs
, use
, cp
);
6551 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6553 iv_ca_set_no_cp (data
, ivs
, use
);
6554 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6556 if (compare_costs (act_cost
, best_cost
) < 0)
6558 best_cost
= act_cost
;
6560 iv_ca_delta_free (&best_delta
);
6561 best_delta
= act_delta
;
6564 iv_ca_delta_free (&act_delta
);
6567 if (infinite_cost_p (best_cost
))
6569 for (i
= 0; i
< use
->n_map_members
; i
++)
6571 cp
= use
->cost_map
+ i
;
6576 /* Already tried this. */
6577 if (cand
->important
)
6579 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6581 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6585 if (iv_ca_cand_used_p (ivs
, cand
))
6589 iv_ca_set_cp (data
, ivs
, use
, cp
);
6590 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6591 iv_ca_set_no_cp (data
, ivs
, use
);
6592 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6595 if (compare_costs (act_cost
, best_cost
) < 0)
6597 best_cost
= act_cost
;
6600 iv_ca_delta_free (&best_delta
);
6601 best_delta
= act_delta
;
6604 iv_ca_delta_free (&act_delta
);
6608 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6609 iv_ca_delta_free (&best_delta
);
6611 return !infinite_cost_p (best_cost
);
6614 /* Finds an initial assignment of candidates to uses. */
6616 static struct iv_ca
*
6617 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6619 struct iv_ca
*ivs
= iv_ca_new (data
);
6622 for (i
= 0; i
< n_iv_uses (data
); i
++)
6623 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6632 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6633 points to a bool variable, this function tries to break local
6634 optimal fixed-point by replacing candidates in IVS if it's true. */
6637 try_improve_iv_set (struct ivopts_data
*data
,
6638 struct iv_ca
*ivs
, bool *try_replace_p
)
6641 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6642 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6643 struct iv_cand
*cand
;
6645 /* Try extending the set of induction variables by one. */
6646 for (i
= 0; i
< n_iv_cands (data
); i
++)
6648 cand
= iv_cand (data
, i
);
6650 if (iv_ca_cand_used_p (ivs
, cand
))
6653 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6657 /* If we successfully added the candidate and the set is small enough,
6658 try optimizing it by removing other candidates. */
6659 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6661 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6662 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6663 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6664 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6667 if (compare_costs (acost
, best_cost
) < 0)
6670 iv_ca_delta_free (&best_delta
);
6671 best_delta
= act_delta
;
6674 iv_ca_delta_free (&act_delta
);
6679 /* Try removing the candidates from the set instead. */
6680 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6682 if (!best_delta
&& *try_replace_p
)
6684 *try_replace_p
= false;
6685 /* So far candidate selecting algorithm tends to choose fewer IVs
6686 so that it can handle cases in which loops have many variables
6687 but the best choice is often to use only one general biv. One
6688 weakness is it can't handle opposite cases, in which different
6689 candidates should be chosen with respect to each use. To solve
6690 the problem, we replace candidates in a manner described by the
6691 comments of iv_ca_replace, thus give general algorithm a chance
6692 to break local optimal fixed-point in these cases. */
6693 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6700 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6701 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6702 iv_ca_delta_free (&best_delta
);
6706 /* Attempts to find the optimal set of induction variables. We do simple
6707 greedy heuristic -- we try to replace at most one candidate in the selected
6708 solution and remove the unused ivs while this improves the cost. */
6710 static struct iv_ca
*
6711 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6714 bool try_replace_p
= true;
6716 /* Get the initial solution. */
6717 set
= get_initial_solution (data
, originalp
);
6720 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6721 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6727 fprintf (dump_file
, "Initial set of candidates:\n");
6728 iv_ca_dump (data
, dump_file
, set
);
6731 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6735 fprintf (dump_file
, "Improved to:\n");
6736 iv_ca_dump (data
, dump_file
, set
);
6743 static struct iv_ca
*
6744 find_optimal_iv_set (struct ivopts_data
*data
)
6747 struct iv_ca
*set
, *origset
;
6749 comp_cost cost
, origcost
;
6751 /* Determine the cost based on a strategy that starts with original IVs,
6752 and try again using a strategy that prefers candidates not based
6754 origset
= find_optimal_iv_set_1 (data
, true);
6755 set
= find_optimal_iv_set_1 (data
, false);
6757 if (!origset
&& !set
)
6760 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6761 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6765 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6766 origcost
.cost
, origcost
.complexity
);
6767 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6768 cost
.cost
, cost
.complexity
);
6771 /* Choose the one with the best cost. */
6772 if (compare_costs (origcost
, cost
) <= 0)
6779 iv_ca_free (&origset
);
6781 for (i
= 0; i
< n_iv_uses (data
); i
++)
6783 use
= iv_use (data
, i
);
6784 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6790 /* Creates a new induction variable corresponding to CAND. */
6793 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6795 gimple_stmt_iterator incr_pos
;
6805 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6809 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6817 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6821 /* Mark that the iv is preserved. */
6822 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6823 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6825 /* Rewrite the increment so that it uses var_before directly. */
6826 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6830 gimple_add_tmp_var (cand
->var_before
);
6832 base
= unshare_expr (cand
->iv
->base
);
6834 create_iv (base
, unshare_expr (cand
->iv
->step
),
6835 cand
->var_before
, data
->current_loop
,
6836 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6839 /* Creates new induction variables described in SET. */
6842 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6845 struct iv_cand
*cand
;
6848 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6850 cand
= iv_cand (data
, i
);
6851 create_new_iv (data
, cand
);
6854 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6856 fprintf (dump_file
, "Selected IV set for loop %d",
6857 data
->current_loop
->num
);
6858 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6859 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6860 LOCATION_LINE (data
->loop_loc
));
6861 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6862 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6864 cand
= iv_cand (data
, i
);
6865 dump_cand (dump_file
, cand
);
6867 fprintf (dump_file
, "\n");
6871 /* Rewrites USE (definition of iv used in a nonlinear expression)
6872 using candidate CAND. */
6875 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6876 struct iv_use
*use
, struct iv_cand
*cand
)
6881 gimple_stmt_iterator bsi
;
6883 /* An important special case -- if we are asked to express value of
6884 the original iv by itself, just exit; there is no need to
6885 introduce a new computation (that might also need casting the
6886 variable to unsigned and back). */
6887 if (cand
->pos
== IP_ORIGINAL
6888 && cand
->incremented_at
== use
->stmt
)
6890 enum tree_code stmt_code
;
6892 gcc_assert (is_gimple_assign (use
->stmt
));
6893 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6895 /* Check whether we may leave the computation unchanged.
6896 This is the case only if it does not rely on other
6897 computations in the loop -- otherwise, the computation
6898 we rely upon may be removed in remove_unused_ivs,
6899 thus leading to ICE. */
6900 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6901 if (stmt_code
== PLUS_EXPR
6902 || stmt_code
== MINUS_EXPR
6903 || stmt_code
== POINTER_PLUS_EXPR
)
6905 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6906 op
= gimple_assign_rhs2 (use
->stmt
);
6907 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6908 op
= gimple_assign_rhs1 (use
->stmt
);
6915 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6919 comp
= get_computation (data
->current_loop
, use
, cand
);
6920 gcc_assert (comp
!= NULL_TREE
);
6922 switch (gimple_code (use
->stmt
))
6925 tgt
= PHI_RESULT (use
->stmt
);
6927 /* If we should keep the biv, do not replace it. */
6928 if (name_info (data
, tgt
)->preserve_biv
)
6931 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6935 tgt
= gimple_assign_lhs (use
->stmt
);
6936 bsi
= gsi_for_stmt (use
->stmt
);
6943 if (!valid_gimple_rhs_p (comp
)
6944 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6945 /* We can't allow re-allocating the stmt as it might be pointed
6947 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6948 >= gimple_num_ops (gsi_stmt (bsi
)))))
6950 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6951 true, GSI_SAME_STMT
);
6952 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6954 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6955 /* As this isn't a plain copy we have to reset alignment
6957 if (SSA_NAME_PTR_INFO (comp
))
6958 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6962 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6964 ass
= gimple_build_assign (tgt
, comp
);
6965 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6967 bsi
= gsi_for_stmt (use
->stmt
);
6968 remove_phi_node (&bsi
, false);
6972 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6973 use
->stmt
= gsi_stmt (bsi
);
6977 /* Performs a peephole optimization to reorder the iv update statement with
6978 a mem ref to enable instruction combining in later phases. The mem ref uses
6979 the iv value before the update, so the reordering transformation requires
6980 adjustment of the offset. CAND is the selected IV_CAND.
6984 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6992 directly propagating t over to (1) will introduce overlapping live range
6993 thus increase register pressure. This peephole transform it into:
6997 t = MEM_REF (base, iv2, 8, 8);
7004 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7007 gimple
*iv_update
, *stmt
;
7009 gimple_stmt_iterator gsi
, gsi_iv
;
7011 if (cand
->pos
!= IP_NORMAL
)
7014 var_after
= cand
->var_after
;
7015 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7017 bb
= gimple_bb (iv_update
);
7018 gsi
= gsi_last_nondebug_bb (bb
);
7019 stmt
= gsi_stmt (gsi
);
7021 /* Only handle conditional statement for now. */
7022 if (gimple_code (stmt
) != GIMPLE_COND
)
7025 gsi_prev_nondebug (&gsi
);
7026 stmt
= gsi_stmt (gsi
);
7027 if (stmt
!= iv_update
)
7030 gsi_prev_nondebug (&gsi
);
7031 if (gsi_end_p (gsi
))
7034 stmt
= gsi_stmt (gsi
);
7035 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7038 if (stmt
!= use
->stmt
)
7041 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7044 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7046 fprintf (dump_file
, "Reordering \n");
7047 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7048 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7049 fprintf (dump_file
, "\n");
7052 gsi
= gsi_for_stmt (use
->stmt
);
7053 gsi_iv
= gsi_for_stmt (iv_update
);
7054 gsi_move_before (&gsi_iv
, &gsi
);
7056 cand
->pos
= IP_BEFORE_USE
;
7057 cand
->incremented_at
= use
->stmt
;
7060 /* Rewrites USE (address that is an iv) using candidate CAND. */
7063 rewrite_use_address_1 (struct ivopts_data
*data
,
7064 struct iv_use
*use
, struct iv_cand
*cand
)
7067 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7068 tree base_hint
= NULL_TREE
;
7072 adjust_iv_update_pos (cand
, use
);
7073 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7075 unshare_aff_combination (&aff
);
7077 /* To avoid undefined overflow problems, all IV candidates use unsigned
7078 integer types. The drawback is that this makes it impossible for
7079 create_mem_ref to distinguish an IV that is based on a memory object
7080 from one that represents simply an offset.
7082 To work around this problem, we pass a hint to create_mem_ref that
7083 indicates which variable (if any) in aff is an IV based on a memory
7084 object. Note that we only consider the candidate. If this is not
7085 based on an object, the base of the reference is in some subexpression
7086 of the use -- but these will use pointer types, so they are recognized
7087 by the create_mem_ref heuristics anyway. */
7088 if (cand
->iv
->base_object
)
7089 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7091 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7092 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7093 reference_alias_ptr_type (*use
->op_p
),
7094 iv
, base_hint
, data
->speed
);
7095 copy_ref_info (ref
, *use
->op_p
);
7099 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7100 first use of a group, rewrites sub uses in the group too. */
7103 rewrite_use_address (struct ivopts_data
*data
,
7104 struct iv_use
*use
, struct iv_cand
*cand
)
7106 struct iv_use
*next
;
7108 gcc_assert (use
->sub_id
== 0);
7109 rewrite_use_address_1 (data
, use
, cand
);
7110 update_stmt (use
->stmt
);
7112 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
7114 rewrite_use_address_1 (data
, next
, cand
);
7115 update_stmt (next
->stmt
);
7121 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7125 rewrite_use_compare (struct ivopts_data
*data
,
7126 struct iv_use
*use
, struct iv_cand
*cand
)
7128 tree comp
, *var_p
, op
, bound
;
7129 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7130 enum tree_code compare
;
7131 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
7137 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7138 tree var_type
= TREE_TYPE (var
);
7141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7143 fprintf (dump_file
, "Replacing exit test: ");
7144 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7147 bound
= unshare_expr (fold_convert (var_type
, bound
));
7148 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7150 gsi_insert_seq_on_edge_immediate (
7151 loop_preheader_edge (data
->current_loop
),
7154 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7155 gimple_cond_set_lhs (cond_stmt
, var
);
7156 gimple_cond_set_code (cond_stmt
, compare
);
7157 gimple_cond_set_rhs (cond_stmt
, op
);
7161 /* The induction variable elimination failed; just express the original
7163 comp
= get_computation (data
->current_loop
, use
, cand
);
7164 gcc_assert (comp
!= NULL_TREE
);
7166 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7169 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7170 true, GSI_SAME_STMT
);
7173 /* Rewrites USE using candidate CAND. */
7176 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7180 case USE_NONLINEAR_EXPR
:
7181 rewrite_use_nonlinear_expr (data
, use
, cand
);
7185 rewrite_use_address (data
, use
, cand
);
7189 rewrite_use_compare (data
, use
, cand
);
7196 update_stmt (use
->stmt
);
7199 /* Rewrite the uses using the selected induction variables. */
7202 rewrite_uses (struct ivopts_data
*data
)
7205 struct iv_cand
*cand
;
7208 for (i
= 0; i
< n_iv_uses (data
); i
++)
7210 use
= iv_use (data
, i
);
7211 cand
= use
->selected
;
7214 rewrite_use (data
, use
, cand
);
7218 /* Removes the ivs that are not used after rewriting. */
7221 remove_unused_ivs (struct ivopts_data
*data
)
7225 bitmap toremove
= BITMAP_ALLOC (NULL
);
7227 /* Figure out an order in which to release SSA DEFs so that we don't
7228 release something that we'd have to propagate into a debug stmt
7230 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7232 struct version_info
*info
;
7234 info
= ver_info (data
, j
);
7236 && !integer_zerop (info
->iv
->step
)
7238 && !info
->iv
->have_use_for
7239 && !info
->preserve_biv
)
7241 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7243 tree def
= info
->iv
->ssa_name
;
7245 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7247 imm_use_iterator imm_iter
;
7248 use_operand_p use_p
;
7252 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7254 if (!gimple_debug_bind_p (stmt
))
7257 /* We just want to determine whether to do nothing
7258 (count == 0), to substitute the computed
7259 expression into a single use of the SSA DEF by
7260 itself (count == 1), or to use a debug temp
7261 because the SSA DEF is used multiple times or as
7262 part of a larger expression (count > 1). */
7264 if (gimple_debug_bind_get_value (stmt
) != def
)
7268 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7274 struct iv_use dummy_use
;
7275 struct iv_cand
*best_cand
= NULL
, *cand
;
7276 unsigned i
, best_pref
= 0, cand_pref
;
7278 memset (&dummy_use
, 0, sizeof (dummy_use
));
7279 dummy_use
.iv
= info
->iv
;
7280 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7282 cand
= iv_use (data
, i
)->selected
;
7283 if (cand
== best_cand
)
7285 cand_pref
= operand_equal_p (cand
->iv
->step
,
7289 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7290 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7293 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7295 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7298 best_pref
= cand_pref
;
7305 tree comp
= get_computation_at (data
->current_loop
,
7306 &dummy_use
, best_cand
,
7307 SSA_NAME_DEF_STMT (def
));
7313 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7314 DECL_ARTIFICIAL (vexpr
) = 1;
7315 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7316 if (SSA_NAME_VAR (def
))
7317 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7319 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7321 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7322 gimple_stmt_iterator gsi
;
7324 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7325 gsi
= gsi_after_labels (gimple_bb
7326 (SSA_NAME_DEF_STMT (def
)));
7328 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7330 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7334 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7336 if (!gimple_debug_bind_p (stmt
))
7339 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7340 SET_USE (use_p
, comp
);
7348 release_defs_bitset (toremove
);
7350 BITMAP_FREE (toremove
);
7353 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7354 for hash_map::traverse. */
7357 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7363 /* Frees data allocated by the optimization of a single loop. */
7366 free_loop_data (struct ivopts_data
*data
)
7374 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7375 delete data
->niters
;
7376 data
->niters
= NULL
;
7379 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7381 struct version_info
*info
;
7383 info
= ver_info (data
, i
);
7385 info
->has_nonlin_use
= false;
7386 info
->preserve_biv
= false;
7389 bitmap_clear (data
->relevant
);
7390 bitmap_clear (data
->important_candidates
);
7392 for (i
= 0; i
< n_iv_uses (data
); i
++)
7394 struct iv_use
*use
= iv_use (data
, i
);
7395 struct iv_use
*pre
= use
, *sub
= use
->next
;
7399 gcc_assert (sub
->related_cands
== NULL
);
7400 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7407 BITMAP_FREE (use
->related_cands
);
7408 for (j
= 0; j
< use
->n_map_members
; j
++)
7409 if (use
->cost_map
[j
].depends_on
)
7410 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7411 free (use
->cost_map
);
7414 data
->iv_uses
.truncate (0);
7416 for (i
= 0; i
< n_iv_cands (data
); i
++)
7418 struct iv_cand
*cand
= iv_cand (data
, i
);
7420 if (cand
->depends_on
)
7421 BITMAP_FREE (cand
->depends_on
);
7424 data
->iv_candidates
.truncate (0);
7426 if (data
->version_info_size
< num_ssa_names
)
7428 data
->version_info_size
= 2 * num_ssa_names
;
7429 free (data
->version_info
);
7430 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7433 data
->max_inv_id
= 0;
7435 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7436 SET_DECL_RTL (obj
, NULL_RTX
);
7438 decl_rtl_to_reset
.truncate (0);
7440 data
->inv_expr_tab
->empty ();
7441 data
->inv_expr_id
= 0;
7444 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7448 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7450 free_loop_data (data
);
7451 free (data
->version_info
);
7452 BITMAP_FREE (data
->relevant
);
7453 BITMAP_FREE (data
->important_candidates
);
7455 decl_rtl_to_reset
.release ();
7456 data
->iv_uses
.release ();
7457 data
->iv_candidates
.release ();
7458 delete data
->inv_expr_tab
;
7459 data
->inv_expr_tab
= NULL
;
7460 free_affine_expand_cache (&data
->name_expansion_cache
);
7461 obstack_free (&data
->iv_obstack
, NULL
);
7464 /* Returns true if the loop body BODY includes any function calls. */
7467 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7469 gimple_stmt_iterator gsi
;
7472 for (i
= 0; i
< num_nodes
; i
++)
7473 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7475 gimple
*stmt
= gsi_stmt (gsi
);
7476 if (is_gimple_call (stmt
)
7477 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7483 /* Optimizes the LOOP. Returns true if anything changed. */
7486 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7488 bool changed
= false;
7489 struct iv_ca
*iv_ca
;
7490 edge exit
= single_dom_exit (loop
);
7493 gcc_assert (!data
->niters
);
7494 data
->current_loop
= loop
;
7495 data
->loop_loc
= find_loop_location (loop
);
7496 data
->speed
= optimize_loop_for_speed_p (loop
);
7498 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7500 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7501 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7502 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7503 LOCATION_LINE (data
->loop_loc
));
7504 fprintf (dump_file
, "\n");
7508 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7509 exit
->src
->index
, exit
->dest
->index
);
7510 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7511 fprintf (dump_file
, "\n");
7514 fprintf (dump_file
, "\n");
7517 body
= get_loop_body (loop
);
7518 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7519 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7522 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7524 /* For each ssa name determines whether it behaves as an induction variable
7526 if (!find_induction_variables (data
))
7529 /* Finds interesting uses (item 1). */
7530 find_interesting_uses (data
);
7531 group_address_uses (data
);
7532 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7535 /* Finds candidates for the induction variables (item 2). */
7536 find_iv_candidates (data
);
7538 /* Calculates the costs (item 3, part 1). */
7539 determine_iv_costs (data
);
7540 determine_use_iv_costs (data
);
7541 determine_set_costs (data
);
7543 /* Find the optimal set of induction variables (item 3, part 2). */
7544 iv_ca
= find_optimal_iv_set (data
);
7549 /* Create the new induction variables (item 4, part 1). */
7550 create_new_ivs (data
, iv_ca
);
7551 iv_ca_free (&iv_ca
);
7553 /* Rewrite the uses (item 4, part 2). */
7554 rewrite_uses (data
);
7556 /* Remove the ivs that are unused after rewriting. */
7557 remove_unused_ivs (data
);
7559 /* We have changed the structure of induction variables; it might happen
7560 that definitions in the scev database refer to some of them that were
7565 free_loop_data (data
);
7570 /* Main entry point. Optimizes induction variables in loops. */
7573 tree_ssa_iv_optimize (void)
7576 struct ivopts_data data
;
7578 tree_ssa_iv_optimize_init (&data
);
7580 /* Optimize the loops starting with the innermost ones. */
7581 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7584 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7586 tree_ssa_iv_optimize_loop (&data
, loop
);
7589 tree_ssa_iv_optimize_finalize (&data
);