1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
71 #include "coretypes.h"
77 #include "tree-pass.h"
81 #include "insn-config.h"
85 #include "gimple-pretty-print.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
103 #include "tree-scalar-evolution.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
115 /* The infinite cost. */
116 #define INFTY 10000000
118 #define AVG_LOOP_NITER(LOOP) 5
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop
*loop
)
127 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
130 niter
= likely_max_stmt_executions_int (loop
);
131 if (niter
== -1 || niter
> AVG_LOOP_NITER (loop
))
132 return AVG_LOOP_NITER (loop
);
140 /* Representation of the induction variable. */
143 tree base
; /* Initial value of the iv. */
144 tree base_object
; /* A memory object to that the induction variable points. */
145 tree step
; /* Step of the iv (constant only). */
146 tree ssa_name
; /* The ssa name with the value. */
147 struct iv_use
*nonlin_use
; /* The identifier in the use if it is the case. */
148 bool biv_p
; /* Is it a biv? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_ADDRESS
, /* Use in an address. */
170 USE_COMPARE
/* Use is a compare. */
173 /* Cost of a computation. */
176 comp_cost (): cost (0), complexity (0), scratch (0)
179 comp_cost (int cost
, unsigned complexity
, int scratch
= 0)
180 : cost (cost
), complexity (complexity
), scratch (scratch
)
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost
operator+ (comp_cost cost1
, comp_cost cost2
);
189 /* Adds COST to the comp_cost. */
190 comp_cost
operator+= (comp_cost cost
);
192 /* Adds constant C to this comp_cost. */
193 comp_cost
operator+= (HOST_WIDE_INT c
);
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost
operator-= (HOST_WIDE_INT c
);
198 /* Divide the comp_cost by constant C. */
199 comp_cost
operator/= (HOST_WIDE_INT c
);
201 /* Multiply the comp_cost by constant C. */
202 comp_cost
operator*= (HOST_WIDE_INT c
);
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost
operator- (comp_cost cost1
, comp_cost cost2
);
207 /* Subtracts COST from this comp_cost. */
208 comp_cost
operator-= (comp_cost cost
);
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1
, comp_cost cost2
);
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1
, comp_cost cost2
);
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1
, comp_cost cost2
);
219 int cost
; /* The runtime cost. */
220 unsigned complexity
; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch
; /* Scratch used during cost computation. */
227 static const comp_cost no_cost
;
228 static const comp_cost
infinite_cost (INFTY
, INFTY
, INFTY
);
231 comp_cost::infinite_cost_p ()
233 return cost
== INFTY
;
237 operator+ (comp_cost cost1
, comp_cost cost2
)
239 if (cost1
.infinite_cost_p () || cost2
.infinite_cost_p ())
240 return infinite_cost
;
242 cost1
.cost
+= cost2
.cost
;
243 cost1
.complexity
+= cost2
.complexity
;
249 operator- (comp_cost cost1
, comp_cost cost2
)
251 if (cost1
.infinite_cost_p ())
252 return infinite_cost
;
254 gcc_assert (!cost2
.infinite_cost_p ());
256 cost1
.cost
-= cost2
.cost
;
257 cost1
.complexity
-= cost2
.complexity
;
263 comp_cost::operator+= (comp_cost cost
)
265 *this = *this + cost
;
270 comp_cost::operator+= (HOST_WIDE_INT c
)
272 if (infinite_cost_p ())
281 comp_cost::operator-= (HOST_WIDE_INT c
)
283 if (infinite_cost_p ())
292 comp_cost::operator/= (HOST_WIDE_INT c
)
294 if (infinite_cost_p ())
303 comp_cost::operator*= (HOST_WIDE_INT c
)
305 if (infinite_cost_p ())
314 comp_cost::operator-= (comp_cost cost
)
316 *this = *this - cost
;
321 operator< (comp_cost cost1
, comp_cost cost2
)
323 if (cost1
.cost
== cost2
.cost
)
324 return cost1
.complexity
< cost2
.complexity
;
326 return cost1
.cost
< cost2
.cost
;
330 operator== (comp_cost cost1
, comp_cost cost2
)
332 return cost1
.cost
== cost2
.cost
333 && cost1
.complexity
== cost2
.complexity
;
337 operator<= (comp_cost cost1
, comp_cost cost2
)
339 return cost1
< cost2
|| cost1
== cost2
;
342 struct iv_inv_expr_ent
;
344 /* The candidate - cost pair. */
347 struct iv_cand
*cand
; /* The candidate. */
348 comp_cost cost
; /* The cost. */
349 bitmap depends_on
; /* The list of invariants that have to be
351 tree value
; /* For final value elimination, the expression for
352 the final value of the iv. For iv elimination,
353 the new bound to compare with. */
354 enum tree_code comp
; /* For iv elimination, the comparison. */
355 iv_inv_expr_ent
*inv_expr
; /* Loop invariant expression. */
361 unsigned id
; /* The id of the use. */
362 unsigned group_id
; /* The group id the use belongs to. */
363 enum use_type type
; /* Type of the use. */
364 struct iv
*iv
; /* The induction variable it is based on. */
365 gimple
*stmt
; /* Statement in that it occurs. */
366 tree
*op_p
; /* The place where it occurs. */
368 tree addr_base
; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset
;
370 /* Const offset stripped from base address. */
376 /* The id of the group. */
378 /* Uses of the group are of the same type. */
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands
;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members
;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair
*cost_map
;
386 /* The selected candidate for the group. */
387 struct iv_cand
*selected
;
388 /* Uses in the group. */
389 vec
<struct iv_use
*> vuses
;
392 /* The position where the iv is computed. */
395 IP_NORMAL
, /* At the end, just before the exit condition. */
396 IP_END
, /* At the end of the latch block. */
397 IP_BEFORE_USE
, /* Immediately before a specific use. */
398 IP_AFTER_USE
, /* Immediately after a specific use. */
399 IP_ORIGINAL
/* The original biv. */
402 /* The induction variable candidate. */
405 unsigned id
; /* The number of the candidate. */
406 bool important
; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
409 gimple
*incremented_at
;/* For original biv, the statement where it is
411 tree var_before
; /* The variable used for it before increment. */
412 tree var_after
; /* The variable used for it after increment. */
413 struct iv
*iv
; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost
; /* Cost of the candidate. */
418 unsigned cost_step
; /* Cost of the candidate's increment operation. */
419 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap depends_on
; /* The list of invariants that are used in step of the
423 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
432 /* IV uses from which this common candidate is derived. */
433 auto_vec
<struct iv_use
*> uses
;
437 /* Hashtable helpers. */
439 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
441 static inline hashval_t
hash (const iv_common_cand
*);
442 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
445 /* Hash function for possible common candidates. */
448 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
453 /* Hash table equality function for common candidates. */
456 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
457 const iv_common_cand
*ccand2
)
459 return (ccand1
->hash
== ccand2
->hash
460 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
461 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
463 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
466 /* Loop invariant expression hashtable entry. */
468 struct iv_inv_expr_ent
470 /* Tree expression of the entry. */
472 /* Unique indentifier. */
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
481 sort_iv_inv_expr_ent (const void *a
, const void *b
)
483 const iv_inv_expr_ent
* const *e1
= (const iv_inv_expr_ent
* const *) (a
);
484 const iv_inv_expr_ent
* const *e2
= (const iv_inv_expr_ent
* const *) (b
);
486 unsigned id1
= (*e1
)->id
;
487 unsigned id2
= (*e2
)->id
;
497 /* Hashtable helpers. */
499 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
501 static inline hashval_t
hash (const iv_inv_expr_ent
*);
502 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
505 /* Hash function for loop invariant expressions. */
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
513 /* Hash table equality function for expressions. */
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
517 const iv_inv_expr_ent
*expr2
)
519 return expr1
->hash
== expr2
->hash
520 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
525 /* The currently optimized loop. */
526 struct loop
*current_loop
;
527 source_location loop_loc
;
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map
<edge
, tree_niter_desc
*> *niters
;
532 /* Number of registers used in it. */
535 /* The size of version_info array allocated. */
536 unsigned version_info_size
;
538 /* The array of information for the ssa names. */
539 struct version_info
*version_info
;
541 /* The hashtable of loop invariant expressions created
543 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
545 /* Loop invariant expression id. */
548 /* The bitmap of indices in version_info whose value was changed. */
551 /* The uses of induction variables. */
552 vec
<iv_group
*> vgroups
;
554 /* The candidates. */
555 vec
<iv_cand
*> vcands
;
557 /* A bitmap of important candidates. */
558 bitmap important_candidates
;
560 /* Cache used by tree_to_aff_combination_expand. */
561 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
563 /* The hashtable of common candidates derived from iv uses. */
564 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
566 /* The common candidates. */
567 vec
<iv_common_cand
*> iv_common_cands
;
569 /* The maximum invariant id. */
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr
;
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack
;
578 /* Whether to consider just related and important candidates when replacing a
580 bool consider_all_candidates
;
582 /* Are we optimizing for speed? */
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call
;
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p
;
592 /* An assignment of iv candidates to uses. */
596 /* The number of uses covered by the assignment. */
599 /* Number of uses that cannot be expressed by the candidates in the set. */
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair
**cand_for_group
;
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses
;
608 /* The candidates used. */
611 /* The number of candidates in the set. */
614 /* Total number of registers needed. */
617 /* Total cost of expressing uses. */
618 comp_cost cand_use_cost
;
620 /* Total cost of candidates. */
623 /* Number of times each invariant is used. */
624 unsigned *n_invariant_uses
;
626 /* Hash set with used invariant expression. */
627 hash_map
<iv_inv_expr_ent
*, unsigned> *used_inv_exprs
;
629 /* Total cost of the assignment. */
633 /* Difference of two iv candidate assignments. */
638 struct iv_group
*group
;
640 /* An old assignment (for rollback purposes). */
641 struct cost_pair
*old_cp
;
643 /* A new assignment. */
644 struct cost_pair
*new_cp
;
646 /* Next change in the list. */
647 struct iv_ca_delta
*next
;
650 /* Bound on number of candidates below that all candidates are considered. */
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656 optimizing such a loop would help, and it would take ages). */
658 #define MAX_CONSIDERED_GROUPS \
659 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662 ivs from the set always. */
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
667 /* The list of trees for that the decl_rtl field must be reset is stored
670 static vec
<tree
> decl_rtl_to_reset
;
672 static comp_cost
force_expr_to_var_cost (tree
, bool);
674 /* The single loop exit if it dominates the latch, NULL otherwise. */
677 single_dom_exit (struct loop
*loop
)
679 edge exit
= single_exit (loop
);
684 if (!just_once_each_iteration_p (loop
, exit
->src
))
690 /* Dumps information about the induction variable IV to FILE. Don't dump
691 variable's name if DUMP_NAME is FALSE. The information is dumped with
692 preceding spaces indicated by INDENT_LEVEL. */
695 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
, unsigned indent_level
)
698 const char spaces
[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
700 if (indent_level
> 4)
702 p
= spaces
+ 8 - (indent_level
<< 1);
704 fprintf (file
, "%sIV struct:\n", p
);
705 if (iv
->ssa_name
&& dump_name
)
707 fprintf (file
, "%s SSA_NAME:\t", p
);
708 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
709 fprintf (file
, "\n");
712 fprintf (file
, "%s Type:\t", p
);
713 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
714 fprintf (file
, "\n");
716 fprintf (file
, "%s Base:\t", p
);
717 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
718 fprintf (file
, "\n");
720 fprintf (file
, "%s Step:\t", p
);
721 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
722 fprintf (file
, "\n");
726 fprintf (file
, "%s Object:\t", p
);
727 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
728 fprintf (file
, "\n");
731 fprintf (file
, "%s Biv:\t%c\n", p
, iv
->biv_p
? 'Y' : 'N');
733 fprintf (file
, "%s Overflowness wrto loop niter:\t%s\n",
734 p
, iv
->no_overflow
? "No-overflow" : "Overflow");
737 /* Dumps information about the USE to FILE. */
740 dump_use (FILE *file
, struct iv_use
*use
)
742 fprintf (file
, " Use %d.%d:\n", use
->group_id
, use
->id
);
743 fprintf (file
, " At stmt:\t");
744 print_gimple_stmt (file
, use
->stmt
, 0, 0);
745 fprintf (file
, " At pos:\t");
747 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
748 fprintf (file
, "\n");
749 dump_iv (file
, use
->iv
, false, 2);
752 /* Dumps information about the uses to FILE. */
755 dump_groups (FILE *file
, struct ivopts_data
*data
)
758 struct iv_group
*group
;
760 for (i
= 0; i
< data
->vgroups
.length (); i
++)
762 group
= data
->vgroups
[i
];
763 fprintf (file
, "Group %d:\n", group
->id
);
764 if (group
->type
== USE_NONLINEAR_EXPR
)
765 fprintf (file
, " Type:\tGENERIC\n");
766 else if (group
->type
== USE_ADDRESS
)
767 fprintf (file
, " Type:\tADDRESS\n");
770 gcc_assert (group
->type
== USE_COMPARE
);
771 fprintf (file
, " Type:\tCOMPARE\n");
773 for (j
= 0; j
< group
->vuses
.length (); j
++)
774 dump_use (file
, group
->vuses
[j
]);
778 /* Dumps information about induction variable candidate CAND to FILE. */
781 dump_cand (FILE *file
, struct iv_cand
*cand
)
783 struct iv
*iv
= cand
->iv
;
785 fprintf (file
, "Candidate %d:\n", cand
->id
);
786 if (cand
->depends_on
)
788 fprintf (file
, " Depend on: ");
789 dump_bitmap (file
, cand
->depends_on
);
792 if (cand
->var_before
)
794 fprintf (file
, " Var befor: ");
795 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
796 fprintf (file
, "\n");
800 fprintf (file
, " Var after: ");
801 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
802 fprintf (file
, "\n");
808 fprintf (file
, " Incr POS: before exit test\n");
812 fprintf (file
, " Incr POS: before use %d\n", cand
->ainc_use
->id
);
816 fprintf (file
, " Incr POS: after use %d\n", cand
->ainc_use
->id
);
820 fprintf (file
, " Incr POS: at end\n");
824 fprintf (file
, " Incr POS: orig biv\n");
828 dump_iv (file
, iv
, false, 1);
831 /* Returns the info for ssa version VER. */
833 static inline struct version_info
*
834 ver_info (struct ivopts_data
*data
, unsigned ver
)
836 return data
->version_info
+ ver
;
839 /* Returns the info for ssa name NAME. */
841 static inline struct version_info
*
842 name_info (struct ivopts_data
*data
, tree name
)
844 return ver_info (data
, SSA_NAME_VERSION (name
));
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
851 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
853 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
857 if (sbb
== loop
->latch
)
863 return stmt
== last_stmt (bb
);
866 /* Returns true if STMT if after the place where the original induction
867 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
868 if the positions are identical. */
871 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
873 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
874 basic_block stmt_bb
= gimple_bb (stmt
);
876 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
879 if (stmt_bb
!= cand_bb
)
883 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
885 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
888 /* Returns true if STMT if after the place where the induction variable
889 CAND is incremented in LOOP. */
892 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
900 return stmt_after_ip_normal_pos (loop
, stmt
);
904 return stmt_after_inc_pos (cand
, stmt
, false);
907 return stmt_after_inc_pos (cand
, stmt
, true);
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
917 abnormal_ssa_name_p (tree exp
)
922 if (TREE_CODE (exp
) != SSA_NAME
)
925 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929 abnormal phi node. Callback for for_each_index. */
932 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
933 void *data ATTRIBUTE_UNUSED
)
935 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
937 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
939 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
943 return !abnormal_ssa_name_p (*index
);
946 /* Returns true if EXPR contains a ssa name that occurs in an
947 abnormal phi node. */
950 contains_abnormal_ssa_name_p (tree expr
)
953 enum tree_code_class codeclass
;
958 code
= TREE_CODE (expr
);
959 codeclass
= TREE_CODE_CLASS (code
);
961 if (code
== SSA_NAME
)
962 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
964 if (code
== INTEGER_CST
965 || is_gimple_min_invariant (expr
))
968 if (code
== ADDR_EXPR
)
969 return !for_each_index (&TREE_OPERAND (expr
, 0),
970 idx_contains_abnormal_ssa_name_p
,
973 if (code
== COND_EXPR
)
974 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
975 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
982 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
987 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
999 /* Returns the structure describing number of iterations determined from
1000 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1002 static struct tree_niter_desc
*
1003 niter_for_exit (struct ivopts_data
*data
, edge exit
)
1005 struct tree_niter_desc
*desc
;
1006 tree_niter_desc
**slot
;
1010 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
1014 slot
= data
->niters
->get (exit
);
1018 /* Try to determine number of iterations. We cannot safely work with ssa
1019 names that appear in phi nodes on abnormal edges, so that we do not
1020 create overlapping life ranges for them (PR 27283). */
1021 desc
= XNEW (struct tree_niter_desc
);
1022 if (!number_of_iterations_exit (data
->current_loop
,
1024 || contains_abnormal_ssa_name_p (desc
->niter
))
1029 data
->niters
->put (exit
, desc
);
1037 /* Returns the structure describing number of iterations determined from
1038 single dominating exit of DATA->current_loop, or NULL if something
1041 static struct tree_niter_desc
*
1042 niter_for_single_dom_exit (struct ivopts_data
*data
)
1044 edge exit
= single_dom_exit (data
->current_loop
);
1049 return niter_for_exit (data
, exit
);
1052 /* Initializes data structures used by the iv optimization pass, stored
1056 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
1058 data
->version_info_size
= 2 * num_ssa_names
;
1059 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
1060 data
->relevant
= BITMAP_ALLOC (NULL
);
1061 data
->important_candidates
= BITMAP_ALLOC (NULL
);
1062 data
->max_inv_id
= 0;
1063 data
->niters
= NULL
;
1064 data
->vgroups
.create (20);
1065 data
->vcands
.create (20);
1066 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
1067 data
->max_inv_expr_id
= 0;
1068 data
->name_expansion_cache
= NULL
;
1069 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
1070 data
->iv_common_cands
.create (20);
1071 decl_rtl_to_reset
.create (20);
1072 gcc_obstack_init (&data
->iv_obstack
);
1075 /* Returns a memory object to that EXPR points. In case we are able to
1076 determine that it does not point to any such object, NULL is returned. */
1079 determine_base_object (tree expr
)
1081 enum tree_code code
= TREE_CODE (expr
);
1084 /* If this is a pointer casted to any type, we need to determine
1085 the base object for the pointer; so handle conversions before
1086 throwing away non-pointer expressions. */
1087 if (CONVERT_EXPR_P (expr
))
1088 return determine_base_object (TREE_OPERAND (expr
, 0));
1090 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
1099 obj
= TREE_OPERAND (expr
, 0);
1100 base
= get_base_address (obj
);
1105 if (TREE_CODE (base
) == MEM_REF
)
1106 return determine_base_object (TREE_OPERAND (base
, 0));
1108 return fold_convert (ptr_type_node
,
1109 build_fold_addr_expr (base
));
1111 case POINTER_PLUS_EXPR
:
1112 return determine_base_object (TREE_OPERAND (expr
, 0));
1116 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1120 return fold_convert (ptr_type_node
, expr
);
1124 /* Return true if address expression with non-DECL_P operand appears
1128 contain_complex_addr_expr (tree expr
)
1133 switch (TREE_CODE (expr
))
1135 case POINTER_PLUS_EXPR
:
1138 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1139 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1143 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1156 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1157 bool no_overflow
= false)
1160 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1161 sizeof (struct iv
));
1162 gcc_assert (step
!= NULL_TREE
);
1164 /* Lower address expression in base except ones with DECL_P as operand.
1166 1) More accurate cost can be computed for address expressions;
1167 2) Duplicate candidates won't be created for bases in different
1168 forms, like &a[0] and &a. */
1170 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1171 || contain_complex_addr_expr (expr
))
1174 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1175 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1179 iv
->base_object
= determine_base_object (base
);
1182 iv
->nonlin_use
= NULL
;
1183 iv
->ssa_name
= NULL_TREE
;
1184 iv
->no_overflow
= no_overflow
;
1185 iv
->have_address_use
= false;
1190 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1191 doesn't overflow. */
1194 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1197 struct version_info
*info
= name_info (data
, iv
);
1199 gcc_assert (!info
->iv
);
1201 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1202 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1203 info
->iv
->ssa_name
= iv
;
1206 /* Finds induction variable declaration for VAR. */
1209 get_iv (struct ivopts_data
*data
, tree var
)
1212 tree type
= TREE_TYPE (var
);
1214 if (!POINTER_TYPE_P (type
)
1215 && !INTEGRAL_TYPE_P (type
))
1218 if (!name_info (data
, var
)->iv
)
1220 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1223 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1224 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1227 return name_info (data
, var
)->iv
;
1230 /* Return the first non-invariant ssa var found in EXPR. */
1233 extract_single_var_from_expr (tree expr
)
1237 enum tree_code code
;
1239 if (!expr
|| is_gimple_min_invariant (expr
))
1242 code
= TREE_CODE (expr
);
1243 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1245 n
= TREE_OPERAND_LENGTH (expr
);
1246 for (i
= 0; i
< n
; i
++)
1248 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1254 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1257 /* Finds basic ivs. */
1260 find_bivs (struct ivopts_data
*data
)
1264 tree step
, type
, base
, stop
;
1266 struct loop
*loop
= data
->current_loop
;
1269 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1273 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1276 if (virtual_operand_p (PHI_RESULT (phi
)))
1279 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1282 if (integer_zerop (iv
.step
))
1286 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1287 /* Stop expanding iv base at the first ssa var referred by iv step.
1288 Ideally we should stop at any ssa var, because that's expensive
1289 and unusual to happen, we just do it on the first one.
1291 See PR64705 for the rationale. */
1292 stop
= extract_single_var_from_expr (step
);
1293 base
= expand_simple_operations (base
, stop
);
1294 if (contains_abnormal_ssa_name_p (base
)
1295 || contains_abnormal_ssa_name_p (step
))
1298 type
= TREE_TYPE (PHI_RESULT (phi
));
1299 base
= fold_convert (type
, base
);
1302 if (POINTER_TYPE_P (type
))
1303 step
= convert_to_ptrofftype (step
);
1305 step
= fold_convert (type
, step
);
1308 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1315 /* Marks basic ivs. */
1318 mark_bivs (struct ivopts_data
*data
)
1323 struct iv
*iv
, *incr_iv
;
1324 struct loop
*loop
= data
->current_loop
;
1325 basic_block incr_bb
;
1328 data
->bivs_not_used_in_addr
= 0;
1329 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1333 iv
= get_iv (data
, PHI_RESULT (phi
));
1337 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1338 def
= SSA_NAME_DEF_STMT (var
);
1339 /* Don't mark iv peeled from other one as biv. */
1341 && gimple_code (def
) == GIMPLE_PHI
1342 && gimple_bb (def
) == loop
->header
)
1345 incr_iv
= get_iv (data
, var
);
1349 /* If the increment is in the subloop, ignore it. */
1350 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1351 if (incr_bb
->loop_father
!= data
->current_loop
1352 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1356 incr_iv
->biv_p
= true;
1357 if (iv
->no_overflow
)
1358 data
->bivs_not_used_in_addr
++;
1359 if (incr_iv
->no_overflow
)
1360 data
->bivs_not_used_in_addr
++;
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365 parameters to IV. */
1368 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1371 struct loop
*loop
= data
->current_loop
;
1373 iv
->base
= NULL_TREE
;
1374 iv
->step
= NULL_TREE
;
1376 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1379 lhs
= gimple_assign_lhs (stmt
);
1380 if (TREE_CODE (lhs
) != SSA_NAME
)
1383 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1386 /* Stop expanding iv base at the first ssa var referred by iv step.
1387 Ideally we should stop at any ssa var, because that's expensive
1388 and unusual to happen, we just do it on the first one.
1390 See PR64705 for the rationale. */
1391 stop
= extract_single_var_from_expr (iv
->step
);
1392 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1393 if (contains_abnormal_ssa_name_p (iv
->base
)
1394 || contains_abnormal_ssa_name_p (iv
->step
))
1397 /* If STMT could throw, then do not consider STMT as defining a GIV.
1398 While this will suppress optimizations, we can not safely delete this
1399 GIV and associated statements, even if it appears it is not used. */
1400 if (stmt_could_throw_p (stmt
))
1406 /* Finds general ivs in statement STMT. */
1409 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1413 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1416 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1419 /* Finds general ivs in basic block BB. */
1422 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1424 gimple_stmt_iterator bsi
;
1426 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1427 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1430 /* Finds general ivs. */
1433 find_givs (struct ivopts_data
*data
)
1435 struct loop
*loop
= data
->current_loop
;
1436 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1439 for (i
= 0; i
< loop
->num_nodes
; i
++)
1440 find_givs_in_bb (data
, body
[i
]);
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445 variable and if so, its initial value and step. */
1448 find_induction_variables (struct ivopts_data
*data
)
1453 if (!find_bivs (data
))
1459 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1461 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1465 fprintf (dump_file
, " number of iterations ");
1466 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1467 if (!integer_zerop (niter
->may_be_zero
))
1469 fprintf (dump_file
, "; zero if ");
1470 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1472 fprintf (dump_file
, "\n");
1475 fprintf (dump_file
, "\n<Induction Vars>:\n");
1476 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1478 struct version_info
*info
= ver_info (data
, i
);
1479 if (info
->iv
&& info
->iv
->step
&& !integer_zerop (info
->iv
->step
))
1480 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true, 0);
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489 is the const offset stripped from IV base; for other types use, both
1490 are zero by default. */
1492 static struct iv_use
*
1493 record_use (struct iv_group
*group
, tree
*use_p
, struct iv
*iv
,
1494 gimple
*stmt
, enum use_type type
, tree addr_base
,
1495 unsigned HOST_WIDE_INT addr_offset
)
1497 struct iv_use
*use
= XCNEW (struct iv_use
);
1499 use
->id
= group
->vuses
.length ();
1500 use
->group_id
= group
->id
;
1505 use
->addr_base
= addr_base
;
1506 use
->addr_offset
= addr_offset
;
1508 group
->vuses
.safe_push (use
);
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513 NONLINEAR_USE is true if the invariant is used in a way we do not
1514 handle specially. */
1517 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1520 struct version_info
*info
;
1522 if (TREE_CODE (op
) != SSA_NAME
1523 || virtual_operand_p (op
))
1526 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1528 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1531 info
= name_info (data
, op
);
1533 info
->has_nonlin_use
|= nonlinear_use
;
1535 info
->inv_id
= ++data
->max_inv_id
;
1536 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1540 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1542 /* Record a group of TYPE. */
1544 static struct iv_group
*
1545 record_group (struct ivopts_data
*data
, enum use_type type
)
1547 struct iv_group
*group
= XCNEW (struct iv_group
);
1549 group
->id
= data
->vgroups
.length ();
1551 group
->related_cands
= BITMAP_ALLOC (NULL
);
1552 group
->vuses
.create (1);
1554 data
->vgroups
.safe_push (group
);
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559 New group will be created if there is no existing group for the use. */
1561 static struct iv_use
*
1562 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1563 struct iv
*iv
, gimple
*stmt
, enum use_type type
)
1565 tree addr_base
= NULL
;
1566 struct iv_group
*group
= NULL
;
1567 unsigned HOST_WIDE_INT addr_offset
= 0;
1569 /* Record non address type use in a new group. */
1570 if (type
== USE_ADDRESS
&& iv
->base_object
)
1574 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1575 for (i
= 0; i
< data
->vgroups
.length (); i
++)
1579 group
= data
->vgroups
[i
];
1580 use
= group
->vuses
[0];
1581 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1584 /* Check if it has the same stripped base and step. */
1585 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1586 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1587 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1590 if (i
== data
->vgroups
.length ())
1595 group
= record_group (data
, type
);
1597 return record_use (group
, use_p
, iv
, stmt
, type
, addr_base
, addr_offset
);
1600 /* Checks whether the use OP is interesting and if so, records it. */
1602 static struct iv_use
*
1603 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1609 if (TREE_CODE (op
) != SSA_NAME
)
1612 iv
= get_iv (data
, op
);
1618 gcc_assert (iv
->nonlin_use
->type
== USE_NONLINEAR_EXPR
);
1619 return iv
->nonlin_use
;
1622 if (integer_zerop (iv
->step
))
1624 record_invariant (data
, op
, true);
1628 stmt
= SSA_NAME_DEF_STMT (op
);
1629 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
|| is_gimple_assign (stmt
));
1631 use
= record_group_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1632 iv
->nonlin_use
= use
;
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637 of an induction variable and an invariant. If this is the case,
1638 CONTROL_VAR is set to location of the iv, BOUND to the location of
1639 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640 induction variable descriptions, and true is returned. If this is not
1641 the case, CONTROL_VAR and BOUND are set to the arguments of the
1642 condition and false is returned. */
1645 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1646 tree
**control_var
, tree
**bound
,
1647 struct iv
**iv_var
, struct iv
**iv_bound
)
1649 /* The objects returned when COND has constant operands. */
1650 static struct iv const_iv
;
1652 tree
*op0
= &zero
, *op1
= &zero
;
1653 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1656 if (gimple_code (stmt
) == GIMPLE_COND
)
1658 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1659 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1660 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1664 op0
= gimple_assign_rhs1_ptr (stmt
);
1665 op1
= gimple_assign_rhs2_ptr (stmt
);
1668 zero
= integer_zero_node
;
1669 const_iv
.step
= integer_zero_node
;
1671 if (TREE_CODE (*op0
) == SSA_NAME
)
1672 iv0
= get_iv (data
, *op0
);
1673 if (TREE_CODE (*op1
) == SSA_NAME
)
1674 iv1
= get_iv (data
, *op1
);
1676 /* Exactly one of the compared values must be an iv, and the other one must
1681 if (integer_zerop (iv0
->step
))
1683 /* Control variable may be on the other side. */
1684 std::swap (op0
, op1
);
1685 std::swap (iv0
, iv1
);
1687 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1702 /* Checks whether the condition in STMT is interesting and if so,
1706 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1708 tree
*var_p
, *bound_p
;
1711 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1713 find_interesting_uses_op (data
, *var_p
);
1714 find_interesting_uses_op (data
, *bound_p
);
1718 record_group_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722 relative to the loop LOOP, i.e. if all its operands are defined
1723 outside of the returned loop. Returns NULL if EXPR is not
1724 even obviously invariant in LOOP. */
1727 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1732 if (is_gimple_min_invariant (expr
))
1733 return current_loops
->tree_root
;
1735 if (TREE_CODE (expr
) == SSA_NAME
)
1737 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1740 if (flow_bb_inside_loop_p (loop
, def_bb
))
1742 return superloop_at_depth (loop
,
1743 loop_depth (def_bb
->loop_father
) + 1);
1746 return current_loops
->tree_root
;
1752 unsigned maxdepth
= 0;
1753 len
= TREE_OPERAND_LENGTH (expr
);
1754 for (i
= 0; i
< len
; i
++)
1756 struct loop
*ivloop
;
1757 if (!TREE_OPERAND (expr
, i
))
1760 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1763 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1766 return superloop_at_depth (loop
, maxdepth
);
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770 i.e. if all its operands are defined outside of the LOOP. LOOP
1771 should not be the function body. */
1774 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1779 gcc_assert (loop_depth (loop
) > 0);
1781 if (is_gimple_min_invariant (expr
))
1784 if (TREE_CODE (expr
) == SSA_NAME
)
1786 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1788 && flow_bb_inside_loop_p (loop
, def_bb
))
1797 len
= TREE_OPERAND_LENGTH (expr
);
1798 for (i
= 0; i
< len
; i
++)
1799 if (TREE_OPERAND (expr
, i
)
1800 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1806 /* Given expression EXPR which computes inductive values with respect
1807 to loop recorded in DATA, this function returns biv from which EXPR
1808 is derived by tracing definition chains of ssa variables in EXPR. */
1811 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1816 enum tree_code code
;
1819 if (expr
== NULL_TREE
)
1822 if (is_gimple_min_invariant (expr
))
1825 code
= TREE_CODE (expr
);
1826 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1828 n
= TREE_OPERAND_LENGTH (expr
);
1829 for (i
= 0; i
< n
; i
++)
1831 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1837 /* Stop if it's not ssa name. */
1838 if (code
!= SSA_NAME
)
1841 iv
= get_iv (data
, expr
);
1842 if (!iv
|| integer_zerop (iv
->step
))
1847 stmt
= SSA_NAME_DEF_STMT (expr
);
1848 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1851 use_operand_p use_p
;
1853 if (virtual_operand_p (gimple_phi_result (phi
)))
1856 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1858 tree use
= USE_FROM_PTR (use_p
);
1859 iv
= find_deriving_biv_for_expr (data
, use
);
1865 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1868 e1
= gimple_assign_rhs1 (stmt
);
1869 code
= gimple_assign_rhs_code (stmt
);
1870 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1871 return find_deriving_biv_for_expr (data
, e1
);
1878 case POINTER_PLUS_EXPR
:
1879 /* Increments, decrements and multiplications by a constant
1881 e2
= gimple_assign_rhs2 (stmt
);
1882 iv
= find_deriving_biv_for_expr (data
, e2
);
1888 /* Casts are simple. */
1889 return find_deriving_biv_for_expr (data
, e1
);
1898 /* Record BIV, its predecessor and successor that they are used in
1899 address type uses. */
1902 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1905 tree type
, base_1
, base_2
;
1908 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1909 || biv
->have_address_use
|| !biv
->no_overflow
)
1912 type
= TREE_TYPE (biv
->base
);
1913 if (!INTEGRAL_TYPE_P (type
))
1916 biv
->have_address_use
= true;
1917 data
->bivs_not_used_in_addr
--;
1918 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1919 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1921 struct iv
*iv
= ver_info (data
, i
)->iv
;
1923 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1924 || iv
->have_address_use
|| !iv
->no_overflow
)
1927 if (type
!= TREE_TYPE (iv
->base
)
1928 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1931 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1934 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1935 if (operand_equal_p (base_1
, iv
->base
, 0)
1936 || operand_equal_p (base_2
, biv
->base
, 0))
1938 iv
->have_address_use
= true;
1939 data
->bivs_not_used_in_addr
--;
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945 initial ones. Returns false when the value of the index cannot be determined.
1946 Callback for for_each_index. */
1948 struct ifs_ivopts_data
1950 struct ivopts_data
*ivopts_data
;
1956 idx_find_step (tree base
, tree
*idx
, void *data
)
1958 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1960 bool use_overflow_semantics
= false;
1961 tree step
, iv_base
, iv_step
, lbound
, off
;
1962 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1964 /* If base is a component ref, require that the offset of the reference
1966 if (TREE_CODE (base
) == COMPONENT_REF
)
1968 off
= component_ref_field_offset (base
);
1969 return expr_invariant_in_loop_p (loop
, off
);
1972 /* If base is array, first check whether we will be able to move the
1973 reference out of the loop (in order to take its address in strength
1974 reduction). In order for this to work we need both lower bound
1975 and step to be loop invariants. */
1976 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1978 /* Moreover, for a range, the size needs to be invariant as well. */
1979 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1980 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1983 step
= array_ref_element_size (base
);
1984 lbound
= array_ref_low_bound (base
);
1986 if (!expr_invariant_in_loop_p (loop
, step
)
1987 || !expr_invariant_in_loop_p (loop
, lbound
))
1991 if (TREE_CODE (*idx
) != SSA_NAME
)
1994 iv
= get_iv (dta
->ivopts_data
, *idx
);
1998 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1999 *&x[0], which is not folded and does not trigger the
2000 ARRAY_REF path below. */
2003 if (integer_zerop (iv
->step
))
2006 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2008 step
= array_ref_element_size (base
);
2010 /* We only handle addresses whose step is an integer constant. */
2011 if (TREE_CODE (step
) != INTEGER_CST
)
2015 /* The step for pointer arithmetics already is 1 byte. */
2016 step
= size_one_node
;
2020 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
2021 use_overflow_semantics
= true;
2023 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
2024 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
2025 use_overflow_semantics
))
2027 /* The index might wrap. */
2031 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
2032 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
2034 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
2037 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
2039 record_biv_for_address_use (dta
->ivopts_data
, iv
);
2044 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2045 object is passed to it in DATA. */
2048 idx_record_use (tree base
, tree
*idx
,
2051 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
2052 find_interesting_uses_op (data
, *idx
);
2053 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2055 find_interesting_uses_op (data
, array_ref_element_size (base
));
2056 find_interesting_uses_op (data
, array_ref_low_bound (base
));
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062 store cst to MUL and return true. Otherwise return false.
2063 The returned value is always sign-extended, regardless of the
2064 signedness of TOP and BOT. */
2067 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
2070 enum tree_code code
;
2071 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2072 widest_int res
, p0
, p1
;
2077 if (operand_equal_p (top
, bot
, 0))
2083 code
= TREE_CODE (top
);
2087 mby
= TREE_OPERAND (top
, 1);
2088 if (TREE_CODE (mby
) != INTEGER_CST
)
2091 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2094 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
2099 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2100 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2103 if (code
== MINUS_EXPR
)
2105 *mul
= wi::sext (p0
+ p1
, precision
);
2109 if (TREE_CODE (bot
) != INTEGER_CST
)
2112 p0
= widest_int::from (top
, SIGNED
);
2113 p1
= widest_int::from (bot
, SIGNED
);
2116 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
2124 /* Return true if memory reference REF with step STEP may be unaligned. */
2127 may_be_unaligned_p (tree ref
, tree step
)
2129 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130 thus they are not misaligned. */
2131 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
2134 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2135 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2136 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2138 unsigned HOST_WIDE_INT bitpos
;
2139 unsigned int ref_align
;
2140 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2141 if (ref_align
< align
2142 || (bitpos
% align
) != 0
2143 || (bitpos
% BITS_PER_UNIT
) != 0)
2146 unsigned int trailing_zeros
= tree_ctz (step
);
2147 if (trailing_zeros
< HOST_BITS_PER_INT
2148 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2154 /* Return true if EXPR may be non-addressable. */
2157 may_be_nonaddressable_p (tree expr
)
2159 switch (TREE_CODE (expr
))
2161 case TARGET_MEM_REF
:
2162 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163 target, thus they are always addressable. */
2167 /* Likewise for MEM_REFs, modulo the storage order. */
2168 return REF_REVERSE_STORAGE_ORDER (expr
);
2171 if (REF_REVERSE_STORAGE_ORDER (expr
))
2173 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2176 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2178 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2179 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2182 case ARRAY_RANGE_REF
:
2183 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2185 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2187 case VIEW_CONVERT_EXPR
:
2188 /* This kind of view-conversions may wrap non-addressable objects
2189 and make them look addressable. After some processing the
2190 non-addressability may be uncovered again, causing ADDR_EXPRs
2191 of inappropriate objects to be built. */
2192 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2193 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2207 /* Finds addresses in *OP_P inside STMT. */
2210 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2213 tree base
= *op_p
, step
= size_zero_node
;
2215 struct ifs_ivopts_data ifs_ivopts_data
;
2217 /* Do not play with volatile memory references. A bit too conservative,
2218 perhaps, but safe. */
2219 if (gimple_has_volatile_ops (stmt
))
2222 /* Ignore bitfields for now. Not really something terribly complicated
2224 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2227 base
= unshare_expr (base
);
2229 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2231 tree type
= build_pointer_type (TREE_TYPE (base
));
2235 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2237 civ
= get_iv (data
, TMR_BASE (base
));
2241 TMR_BASE (base
) = civ
->base
;
2244 if (TMR_INDEX2 (base
)
2245 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2247 civ
= get_iv (data
, TMR_INDEX2 (base
));
2251 TMR_INDEX2 (base
) = civ
->base
;
2254 if (TMR_INDEX (base
)
2255 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2257 civ
= get_iv (data
, TMR_INDEX (base
));
2261 TMR_INDEX (base
) = civ
->base
;
2266 if (TMR_STEP (base
))
2267 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2269 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2273 if (integer_zerop (step
))
2275 base
= tree_mem_ref_addr (type
, base
);
2279 ifs_ivopts_data
.ivopts_data
= data
;
2280 ifs_ivopts_data
.stmt
= stmt
;
2281 ifs_ivopts_data
.step
= size_zero_node
;
2282 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2283 || integer_zerop (ifs_ivopts_data
.step
))
2285 step
= ifs_ivopts_data
.step
;
2287 /* Check that the base expression is addressable. This needs
2288 to be done after substituting bases of IVs into it. */
2289 if (may_be_nonaddressable_p (base
))
2292 /* Moreover, on strict alignment platforms, check that it is
2293 sufficiently aligned. */
2294 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2297 base
= build_fold_addr_expr (base
);
2299 /* Substituting bases of IVs into the base expression might
2300 have caused folding opportunities. */
2301 if (TREE_CODE (base
) == ADDR_EXPR
)
2303 tree
*ref
= &TREE_OPERAND (base
, 0);
2304 while (handled_component_p (*ref
))
2305 ref
= &TREE_OPERAND (*ref
, 0);
2306 if (TREE_CODE (*ref
) == MEM_REF
)
2308 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2309 TREE_OPERAND (*ref
, 0),
2310 TREE_OPERAND (*ref
, 1));
2317 civ
= alloc_iv (data
, base
, step
);
2318 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2322 for_each_index (op_p
, idx_record_use
, data
);
2325 /* Finds and records invariants used in STMT. */
2328 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2331 use_operand_p use_p
;
2334 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2336 op
= USE_FROM_PTR (use_p
);
2337 record_invariant (data
, op
, false);
2341 /* Finds interesting uses of induction variables in the statement STMT. */
2344 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2347 tree op
, *lhs
, *rhs
;
2349 use_operand_p use_p
;
2350 enum tree_code code
;
2352 find_invariants_stmt (data
, stmt
);
2354 if (gimple_code (stmt
) == GIMPLE_COND
)
2356 find_interesting_uses_cond (data
, stmt
);
2360 if (is_gimple_assign (stmt
))
2362 lhs
= gimple_assign_lhs_ptr (stmt
);
2363 rhs
= gimple_assign_rhs1_ptr (stmt
);
2365 if (TREE_CODE (*lhs
) == SSA_NAME
)
2367 /* If the statement defines an induction variable, the uses are not
2368 interesting by themselves. */
2370 iv
= get_iv (data
, *lhs
);
2372 if (iv
&& !integer_zerop (iv
->step
))
2376 code
= gimple_assign_rhs_code (stmt
);
2377 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2378 && (REFERENCE_CLASS_P (*rhs
)
2379 || is_gimple_val (*rhs
)))
2381 if (REFERENCE_CLASS_P (*rhs
))
2382 find_interesting_uses_address (data
, stmt
, rhs
);
2384 find_interesting_uses_op (data
, *rhs
);
2386 if (REFERENCE_CLASS_P (*lhs
))
2387 find_interesting_uses_address (data
, stmt
, lhs
);
2390 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2392 find_interesting_uses_cond (data
, stmt
);
2396 /* TODO -- we should also handle address uses of type
2398 memory = call (whatever);
2405 if (gimple_code (stmt
) == GIMPLE_PHI
2406 && gimple_bb (stmt
) == data
->current_loop
->header
)
2408 iv
= get_iv (data
, PHI_RESULT (stmt
));
2410 if (iv
&& !integer_zerop (iv
->step
))
2414 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2416 op
= USE_FROM_PTR (use_p
);
2418 if (TREE_CODE (op
) != SSA_NAME
)
2421 iv
= get_iv (data
, op
);
2425 find_interesting_uses_op (data
, op
);
2429 /* Finds interesting uses of induction variables outside of loops
2430 on loop exit edge EXIT. */
2433 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2439 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2442 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2443 if (!virtual_operand_p (def
))
2444 find_interesting_uses_op (data
, def
);
2448 /* Compute maximum offset of [base + offset] addressing mode
2449 for memory reference represented by USE. */
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use
*use
)
2456 HOST_WIDE_INT i
, off
;
2457 unsigned list_index
, num
;
2459 machine_mode mem_mode
, addr_mode
;
2460 static vec
<HOST_WIDE_INT
> max_offset_list
;
2462 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2463 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2465 num
= max_offset_list
.length ();
2466 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2467 if (list_index
>= num
)
2469 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2470 for (; num
< max_offset_list
.length (); num
++)
2471 max_offset_list
[num
] = -1;
2474 off
= max_offset_list
[list_index
];
2478 addr_mode
= targetm
.addr_space
.address_mode (as
);
2479 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2480 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2482 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2483 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2484 width
= HOST_BITS_PER_WIDE_INT
- 1;
2486 for (i
= width
; i
> 0; i
--)
2488 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2489 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2490 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2493 /* For some strict-alignment targets, the offset must be naturally
2494 aligned. Try an aligned offset if mem_mode is not QImode. */
2495 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2496 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2498 off
-= GET_MODE_SIZE (mem_mode
);
2499 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2500 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2507 max_offset_list
[list_index
] = off
;
2511 /* Comparison function to sort group in ascending order of addr_offset. */
2514 group_compare_offset (const void *a
, const void *b
)
2516 const struct iv_use
*const *u1
= (const struct iv_use
*const *) a
;
2517 const struct iv_use
*const *u2
= (const struct iv_use
*const *) b
;
2519 if ((*u1
)->addr_offset
!= (*u2
)->addr_offset
)
2520 return (*u1
)->addr_offset
< (*u2
)->addr_offset
? -1 : 1;
2525 /* Check if small groups should be split. Return true if no group
2526 contains more than two uses with distinct addr_offsets. Return
2527 false otherwise. We want to split such groups because:
2529 1) Small groups don't have much benefit and may interfer with
2530 general candidate selection.
2531 2) Size for problem with only small groups is usually small and
2532 general algorithm can handle it well.
2534 TODO -- Above claim may not hold when we want to merge memory
2535 accesses with conseuctive addresses. */
2538 split_small_address_groups_p (struct ivopts_data
*data
)
2540 unsigned int i
, j
, distinct
= 1;
2542 struct iv_group
*group
;
2544 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2546 group
= data
->vgroups
[i
];
2547 if (group
->vuses
.length () == 1)
2550 gcc_assert (group
->type
== USE_ADDRESS
);
2551 if (group
->vuses
.length () == 2)
2553 if (group
->vuses
[0]->addr_offset
> group
->vuses
[1]->addr_offset
)
2554 std::swap (group
->vuses
[0], group
->vuses
[1]);
2557 group
->vuses
.qsort (group_compare_offset
);
2563 for (pre
= group
->vuses
[0], j
= 1; j
< group
->vuses
.length (); j
++)
2565 if (group
->vuses
[j
]->addr_offset
!= pre
->addr_offset
)
2567 pre
= group
->vuses
[j
];
2576 return (distinct
<= 2);
2579 /* For each group of address type uses, this function further groups
2580 these uses according to the maximum offset supported by target's
2581 [base + offset] addressing mode. */
2584 split_address_groups (struct ivopts_data
*data
)
2587 HOST_WIDE_INT max_offset
= -1;
2589 /* Reset max offset to split all small groups. */
2590 if (split_small_address_groups_p (data
))
2593 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2595 struct iv_group
*group
= data
->vgroups
[i
];
2596 struct iv_use
*use
= group
->vuses
[0];
2599 use
->group_id
= group
->id
;
2600 if (group
->vuses
.length () == 1)
2603 if (max_offset
!= 0)
2604 max_offset
= compute_max_addr_offset (use
);
2606 for (j
= 1; j
< group
->vuses
.length (); j
++)
2608 struct iv_use
*next
= group
->vuses
[j
];
2610 /* Only uses with offset that can fit in offset part against
2611 the first use can be grouped together. */
2612 if (next
->addr_offset
- use
->addr_offset
2613 > (unsigned HOST_WIDE_INT
) max_offset
)
2617 next
->group_id
= group
->id
;
2620 if (j
< group
->vuses
.length ())
2622 struct iv_group
*new_group
= record_group (data
, group
->type
);
2623 new_group
->vuses
.safe_splice (group
->vuses
);
2624 new_group
->vuses
.block_remove (0, j
);
2625 group
->vuses
.truncate (j
);
2630 /* Finds uses of the induction variables that are interesting. */
2633 find_interesting_uses (struct ivopts_data
*data
)
2636 gimple_stmt_iterator bsi
;
2637 basic_block
*body
= get_loop_body (data
->current_loop
);
2641 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2646 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2647 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2648 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2649 find_interesting_uses_outside (data
, e
);
2651 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2652 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2653 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2654 if (!is_gimple_debug (gsi_stmt (bsi
)))
2655 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2658 split_address_groups (data
);
2660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2664 fprintf (dump_file
, "\n<Invariant Vars>:\n");
2665 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2667 struct version_info
*info
= ver_info (data
, i
);
2670 fprintf (dump_file
, "Inv %d:\t", info
->inv_id
);
2671 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2672 fprintf (dump_file
, "%s\n",
2673 info
->has_nonlin_use
? "" : "\t(eliminable)");
2677 fprintf (dump_file
, "\n<IV Groups>:\n");
2678 dump_groups (dump_file
, data
);
2679 fprintf (dump_file
, "\n");
2685 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2686 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2687 we are at the top-level of the processed address. */
2690 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2691 HOST_WIDE_INT
*offset
)
2693 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2694 enum tree_code code
;
2695 tree type
, orig_type
= TREE_TYPE (expr
);
2696 HOST_WIDE_INT off0
, off1
, st
;
2697 tree orig_expr
= expr
;
2701 type
= TREE_TYPE (expr
);
2702 code
= TREE_CODE (expr
);
2708 if (!cst_and_fits_in_hwi (expr
)
2709 || integer_zerop (expr
))
2712 *offset
= int_cst_value (expr
);
2713 return build_int_cst (orig_type
, 0);
2715 case POINTER_PLUS_EXPR
:
2718 op0
= TREE_OPERAND (expr
, 0);
2719 op1
= TREE_OPERAND (expr
, 1);
2721 op0
= strip_offset_1 (op0
, false, false, &off0
);
2722 op1
= strip_offset_1 (op1
, false, false, &off1
);
2724 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2725 if (op0
== TREE_OPERAND (expr
, 0)
2726 && op1
== TREE_OPERAND (expr
, 1))
2729 if (integer_zerop (op1
))
2731 else if (integer_zerop (op0
))
2733 if (code
== MINUS_EXPR
)
2734 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2739 expr
= fold_build2 (code
, type
, op0
, op1
);
2741 return fold_convert (orig_type
, expr
);
2744 op1
= TREE_OPERAND (expr
, 1);
2745 if (!cst_and_fits_in_hwi (op1
))
2748 op0
= TREE_OPERAND (expr
, 0);
2749 op0
= strip_offset_1 (op0
, false, false, &off0
);
2750 if (op0
== TREE_OPERAND (expr
, 0))
2753 *offset
= off0
* int_cst_value (op1
);
2754 if (integer_zerop (op0
))
2757 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2759 return fold_convert (orig_type
, expr
);
2762 case ARRAY_RANGE_REF
:
2766 step
= array_ref_element_size (expr
);
2767 if (!cst_and_fits_in_hwi (step
))
2770 st
= int_cst_value (step
);
2771 op1
= TREE_OPERAND (expr
, 1);
2772 op1
= strip_offset_1 (op1
, false, false, &off1
);
2773 *offset
= off1
* st
;
2776 && integer_zerop (op1
))
2778 /* Strip the component reference completely. */
2779 op0
= TREE_OPERAND (expr
, 0);
2780 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2793 tmp
= component_ref_field_offset (expr
);
2794 field
= TREE_OPERAND (expr
, 1);
2796 && cst_and_fits_in_hwi (tmp
)
2797 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2799 HOST_WIDE_INT boffset
, abs_off
;
2801 /* Strip the component reference completely. */
2802 op0
= TREE_OPERAND (expr
, 0);
2803 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2804 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2805 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2809 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2816 op0
= TREE_OPERAND (expr
, 0);
2817 op0
= strip_offset_1 (op0
, true, true, &off0
);
2820 if (op0
== TREE_OPERAND (expr
, 0))
2823 expr
= build_fold_addr_expr (op0
);
2824 return fold_convert (orig_type
, expr
);
2827 /* ??? Offset operand? */
2828 inside_addr
= false;
2835 /* Default handling of expressions for that we want to recurse into
2836 the first operand. */
2837 op0
= TREE_OPERAND (expr
, 0);
2838 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2841 if (op0
== TREE_OPERAND (expr
, 0)
2842 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2845 expr
= copy_node (expr
);
2846 TREE_OPERAND (expr
, 0) = op0
;
2848 TREE_OPERAND (expr
, 1) = op1
;
2850 /* Inside address, we might strip the top level component references,
2851 thus changing type of the expression. Handling of ADDR_EXPR
2853 expr
= fold_convert (orig_type
, expr
);
2858 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2861 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2864 tree core
= strip_offset_1 (expr
, false, false, &off
);
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870 We return unsigned type with the same precision, which avoids problems
2874 generic_type_for (tree type
)
2876 if (POINTER_TYPE_P (type
))
2877 return unsigned_type_for (type
);
2879 if (TYPE_UNSIGNED (type
))
2882 return unsigned_type_for (type
);
2885 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2886 the bitmap to that we should store it. */
2888 static struct ivopts_data
*fd_ivopts_data
;
2890 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2892 bitmap
*depends_on
= (bitmap
*) data
;
2893 struct version_info
*info
;
2895 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2897 info
= name_info (fd_ivopts_data
, *expr_p
);
2899 if (!info
->inv_id
|| info
->has_nonlin_use
)
2903 *depends_on
= BITMAP_ALLOC (NULL
);
2904 bitmap_set_bit (*depends_on
, info
->inv_id
);
2909 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2910 position to POS. If USE is not NULL, the candidate is set as related to
2911 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2912 replacement of the final value of the iv by a direct computation. */
2914 static struct iv_cand
*
2915 add_candidate_1 (struct ivopts_data
*data
,
2916 tree base
, tree step
, bool important
, enum iv_position pos
,
2917 struct iv_use
*use
, gimple
*incremented_at
,
2918 struct iv
*orig_iv
= NULL
)
2921 struct iv_cand
*cand
= NULL
;
2922 tree type
, orig_type
;
2924 gcc_assert (base
&& step
);
2926 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927 live, but the ivopts code may replace a real pointer with one
2928 pointing before or after the memory block that is then adjusted
2929 into the memory block during the loop. FIXME: It would likely be
2930 better to actually force the pointer live and still use ivopts;
2931 for example, it would be enough to write the pointer into memory
2932 and keep it there until after the loop. */
2933 if (flag_keep_gc_roots_live
&& POINTER_TYPE_P (TREE_TYPE (base
)))
2936 /* For non-original variables, make sure their values are computed in a type
2937 that does not invoke undefined behavior on overflows (since in general,
2938 we cannot prove that these induction variables are non-wrapping). */
2939 if (pos
!= IP_ORIGINAL
)
2941 orig_type
= TREE_TYPE (base
);
2942 type
= generic_type_for (orig_type
);
2943 if (type
!= orig_type
)
2945 base
= fold_convert (type
, base
);
2946 step
= fold_convert (type
, step
);
2950 for (i
= 0; i
< data
->vcands
.length (); i
++)
2952 cand
= data
->vcands
[i
];
2954 if (cand
->pos
!= pos
)
2957 if (cand
->incremented_at
!= incremented_at
2958 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2959 && cand
->ainc_use
!= use
))
2962 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2963 && operand_equal_p (step
, cand
->iv
->step
, 0)
2964 && (TYPE_PRECISION (TREE_TYPE (base
))
2965 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2969 if (i
== data
->vcands
.length ())
2971 cand
= XCNEW (struct iv_cand
);
2973 cand
->iv
= alloc_iv (data
, base
, step
);
2975 if (pos
!= IP_ORIGINAL
)
2977 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2978 cand
->var_after
= cand
->var_before
;
2980 cand
->important
= important
;
2981 cand
->incremented_at
= incremented_at
;
2982 data
->vcands
.safe_push (cand
);
2984 if (TREE_CODE (step
) != INTEGER_CST
)
2986 fd_ivopts_data
= data
;
2987 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2990 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2991 cand
->ainc_use
= use
;
2993 cand
->ainc_use
= NULL
;
2995 cand
->orig_iv
= orig_iv
;
2996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2997 dump_cand (dump_file
, cand
);
3000 cand
->important
|= important
;
3002 /* Relate candidate to the group for which it is added. */
3004 bitmap_set_bit (data
->vgroups
[use
->group_id
]->related_cands
, i
);
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3012 The purpose is to avoid splitting latch edge with a biv increment, thus
3013 creating a jump, possibly confusing other optimization passes and leaving
3014 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3015 is not available (so we do not have a better alternative), or if the latch
3016 edge is already nonempty. */
3019 allow_ip_end_pos_p (struct loop
*loop
)
3021 if (!ip_normal_pos (loop
))
3024 if (!empty_block_p (ip_end_pos (loop
)))
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031 Important field is set to IMPORTANT. */
3034 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
3035 bool important
, struct iv_use
*use
)
3037 basic_block use_bb
= gimple_bb (use
->stmt
);
3038 machine_mode mem_mode
;
3039 unsigned HOST_WIDE_INT cstepi
;
3041 /* If we insert the increment in any position other than the standard
3042 ones, we must ensure that it is incremented once per iteration.
3043 It must not be in an inner nested loop, or one side of an if
3045 if (use_bb
->loop_father
!= data
->current_loop
3046 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
3047 || stmt_could_throw_p (use
->stmt
)
3048 || !cst_and_fits_in_hwi (step
))
3051 cstepi
= int_cst_value (step
);
3053 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
3054 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
3055 || USE_STORE_PRE_INCREMENT (mem_mode
))
3056 && GET_MODE_SIZE (mem_mode
) == cstepi
)
3057 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
3058 || USE_STORE_PRE_DECREMENT (mem_mode
))
3059 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
3061 enum tree_code code
= MINUS_EXPR
;
3063 tree new_step
= step
;
3065 if (POINTER_TYPE_P (TREE_TYPE (base
)))
3067 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3068 code
= POINTER_PLUS_EXPR
;
3071 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
3072 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
3073 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
3076 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
3077 || USE_STORE_POST_INCREMENT (mem_mode
))
3078 && GET_MODE_SIZE (mem_mode
) == cstepi
)
3079 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
3080 || USE_STORE_POST_DECREMENT (mem_mode
))
3081 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
3083 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3089 position to POS. If USE is not NULL, the candidate is set as related to
3090 it. The candidate computation is scheduled before exit condition and at
3094 add_candidate (struct ivopts_data
*data
,
3095 tree base
, tree step
, bool important
, struct iv_use
*use
,
3096 struct iv
*orig_iv
= NULL
)
3098 if (ip_normal_pos (data
->current_loop
))
3099 add_candidate_1 (data
, base
, step
, important
,
3100 IP_NORMAL
, use
, NULL
, orig_iv
);
3101 if (ip_end_pos (data
->current_loop
)
3102 && allow_ip_end_pos_p (data
->current_loop
))
3103 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3106 /* Adds standard iv candidates. */
3109 add_standard_iv_candidates (struct ivopts_data
*data
)
3111 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3113 /* The same for a double-integer type if it is still fast enough. */
3115 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3116 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3117 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3118 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3120 /* The same for a double-integer type if it is still fast enough. */
3122 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3123 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3124 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3125 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3129 /* Adds candidates bases on the old induction variable IV. */
3132 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3136 struct iv_cand
*cand
;
3138 /* Check if this biv is used in address type use. */
3139 if (iv
->no_overflow
&& iv
->have_address_use
3140 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3141 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3143 tree base
= fold_convert (sizetype
, iv
->base
);
3144 tree step
= fold_convert (sizetype
, iv
->step
);
3146 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3147 add_candidate (data
, base
, step
, true, NULL
, iv
);
3148 /* Add iv cand of the original type only if it has nonlinear use. */
3150 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3153 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3155 /* The same, but with initial value zero. */
3156 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3157 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3159 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3160 iv
->step
, true, NULL
);
3162 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3163 if (gimple_code (phi
) == GIMPLE_PHI
)
3165 /* Additionally record the possibility of leaving the original iv
3167 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3168 /* Don't add candidate if it's from another PHI node because
3169 it's an affine iv appearing in the form of PEELED_CHREC. */
3170 phi
= SSA_NAME_DEF_STMT (def
);
3171 if (gimple_code (phi
) != GIMPLE_PHI
)
3173 cand
= add_candidate_1 (data
,
3174 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3175 SSA_NAME_DEF_STMT (def
));
3178 cand
->var_before
= iv
->ssa_name
;
3179 cand
->var_after
= def
;
3183 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3187 /* Adds candidates based on the old induction variables. */
3190 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3196 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3198 iv
= ver_info (data
, i
)->iv
;
3199 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3200 add_iv_candidate_for_biv (data
, iv
);
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3207 record_common_cand (struct ivopts_data
*data
, tree base
,
3208 tree step
, struct iv_use
*use
)
3210 struct iv_common_cand ent
;
3211 struct iv_common_cand
**slot
;
3215 ent
.hash
= iterative_hash_expr (base
, 0);
3216 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3218 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3221 *slot
= new iv_common_cand ();
3222 (*slot
)->base
= base
;
3223 (*slot
)->step
= step
;
3224 (*slot
)->uses
.create (8);
3225 (*slot
)->hash
= ent
.hash
;
3226 data
->iv_common_cands
.safe_push ((*slot
));
3229 gcc_assert (use
!= NULL
);
3230 (*slot
)->uses
.safe_push (use
);
3234 /* Comparison function used to sort common candidates. */
3237 common_cand_cmp (const void *p1
, const void *p2
)
3240 const struct iv_common_cand
*const *const ccand1
3241 = (const struct iv_common_cand
*const *)p1
;
3242 const struct iv_common_cand
*const *const ccand2
3243 = (const struct iv_common_cand
*const *)p2
;
3245 n1
= (*ccand1
)->uses
.length ();
3246 n2
= (*ccand2
)->uses
.length ();
3250 /* Adds IV candidates based on common candidated recorded. */
3253 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3256 struct iv_cand
*cand_1
, *cand_2
;
3258 data
->iv_common_cands
.qsort (common_cand_cmp
);
3259 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3261 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3263 /* Only add IV candidate if it's derived from multiple uses. */
3264 if (ptr
->uses
.length () <= 1)
3269 if (ip_normal_pos (data
->current_loop
))
3270 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3271 false, IP_NORMAL
, NULL
, NULL
);
3273 if (ip_end_pos (data
->current_loop
)
3274 && allow_ip_end_pos_p (data
->current_loop
))
3275 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3276 false, IP_END
, NULL
, NULL
);
3278 /* Bind deriving uses and the new candidates. */
3279 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3281 struct iv_group
*group
= data
->vgroups
[ptr
->uses
[j
]->group_id
];
3283 bitmap_set_bit (group
->related_cands
, cand_1
->id
);
3285 bitmap_set_bit (group
->related_cands
, cand_2
->id
);
3289 /* Release data since it is useless from this point. */
3290 data
->iv_common_cand_tab
->empty ();
3291 data
->iv_common_cands
.truncate (0);
3294 /* Adds candidates based on the value of USE's iv. */
3297 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3299 unsigned HOST_WIDE_INT offset
;
3302 struct iv
*iv
= use
->iv
;
3304 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3306 /* Record common candidate for use in case it can be shared by others. */
3307 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3309 /* Record common candidate with initial value zero. */
3310 basetype
= TREE_TYPE (iv
->base
);
3311 if (POINTER_TYPE_P (basetype
))
3312 basetype
= sizetype
;
3313 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3315 /* Record common candidate with constant offset stripped in base.
3316 Like the use itself, we also add candidate directly for it. */
3317 base
= strip_offset (iv
->base
, &offset
);
3318 if (offset
|| base
!= iv
->base
)
3320 record_common_cand (data
, base
, iv
->step
, use
);
3321 add_candidate (data
, base
, iv
->step
, false, use
);
3324 /* Record common candidate with base_object removed in base. */
3325 if (iv
->base_object
!= NULL
)
3329 tree step
, base_object
= iv
->base_object
;
3335 STRIP_NOPS (base_object
);
3336 tree_to_aff_combination (base
, TREE_TYPE (base
), &aff_base
);
3337 for (i
= 0; i
< aff_base
.n
; i
++)
3339 if (aff_base
.elts
[i
].coef
!= 1)
3342 if (operand_equal_p (aff_base
.elts
[i
].val
, base_object
, 0))
3347 aff_combination_remove_elt (&aff_base
, i
);
3348 base
= aff_combination_to_tree (&aff_base
);
3349 basetype
= TREE_TYPE (base
);
3350 if (POINTER_TYPE_P (basetype
))
3351 basetype
= sizetype
;
3353 step
= fold_convert (basetype
, step
);
3354 record_common_cand (data
, base
, step
, use
);
3355 /* Also record common candidate with offset stripped. */
3356 base
= strip_offset (base
, &offset
);
3358 record_common_cand (data
, base
, step
, use
);
3362 /* At last, add auto-incremental candidates. Make such variables
3363 important since other iv uses with same base object may be based
3365 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3366 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3369 /* Adds candidates based on the uses. */
3372 add_iv_candidate_for_groups (struct ivopts_data
*data
)
3376 /* Only add candidate for the first use in group. */
3377 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3379 struct iv_group
*group
= data
->vgroups
[i
];
3381 gcc_assert (group
->vuses
[0] != NULL
);
3382 add_iv_candidate_for_use (data
, group
->vuses
[0]);
3384 add_iv_candidate_derived_from_uses (data
);
3387 /* Record important candidates and add them to related_cands bitmaps. */
3390 record_important_candidates (struct ivopts_data
*data
)
3393 struct iv_group
*group
;
3395 for (i
= 0; i
< data
->vcands
.length (); i
++)
3397 struct iv_cand
*cand
= data
->vcands
[i
];
3399 if (cand
->important
)
3400 bitmap_set_bit (data
->important_candidates
, i
);
3403 data
->consider_all_candidates
= (data
->vcands
.length ()
3404 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3406 /* Add important candidates to groups' related_cands bitmaps. */
3407 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3409 group
= data
->vgroups
[i
];
3410 bitmap_ior_into (group
->related_cands
, data
->important_candidates
);
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416 we allocate a simple list to every use. */
3419 alloc_use_cost_map (struct ivopts_data
*data
)
3421 unsigned i
, size
, s
;
3423 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3425 struct iv_group
*group
= data
->vgroups
[i
];
3427 if (data
->consider_all_candidates
)
3428 size
= data
->vcands
.length ();
3431 s
= bitmap_count_bits (group
->related_cands
);
3433 /* Round up to the power of two, so that moduling by it is fast. */
3434 size
= s
? (1 << ceil_log2 (s
)) : 1;
3437 group
->n_map_members
= size
;
3438 group
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443 on invariants DEPENDS_ON and that the value used in expressing it
3444 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3447 set_group_iv_cost (struct ivopts_data
*data
,
3448 struct iv_group
*group
, struct iv_cand
*cand
,
3449 comp_cost cost
, bitmap depends_on
, tree value
,
3450 enum tree_code comp
, iv_inv_expr_ent
*inv_expr
)
3454 if (cost
.infinite_cost_p ())
3456 BITMAP_FREE (depends_on
);
3460 if (data
->consider_all_candidates
)
3462 group
->cost_map
[cand
->id
].cand
= cand
;
3463 group
->cost_map
[cand
->id
].cost
= cost
;
3464 group
->cost_map
[cand
->id
].depends_on
= depends_on
;
3465 group
->cost_map
[cand
->id
].value
= value
;
3466 group
->cost_map
[cand
->id
].comp
= comp
;
3467 group
->cost_map
[cand
->id
].inv_expr
= inv_expr
;
3471 /* n_map_members is a power of two, so this computes modulo. */
3472 s
= cand
->id
& (group
->n_map_members
- 1);
3473 for (i
= s
; i
< group
->n_map_members
; i
++)
3474 if (!group
->cost_map
[i
].cand
)
3476 for (i
= 0; i
< s
; i
++)
3477 if (!group
->cost_map
[i
].cand
)
3483 group
->cost_map
[i
].cand
= cand
;
3484 group
->cost_map
[i
].cost
= cost
;
3485 group
->cost_map
[i
].depends_on
= depends_on
;
3486 group
->cost_map
[i
].value
= value
;
3487 group
->cost_map
[i
].comp
= comp
;
3488 group
->cost_map
[i
].inv_expr
= inv_expr
;
3491 /* Gets cost of (GROUP, CAND) pair. */
3493 static struct cost_pair
*
3494 get_group_iv_cost (struct ivopts_data
*data
, struct iv_group
*group
,
3495 struct iv_cand
*cand
)
3498 struct cost_pair
*ret
;
3503 if (data
->consider_all_candidates
)
3505 ret
= group
->cost_map
+ cand
->id
;
3512 /* n_map_members is a power of two, so this computes modulo. */
3513 s
= cand
->id
& (group
->n_map_members
- 1);
3514 for (i
= s
; i
< group
->n_map_members
; i
++)
3515 if (group
->cost_map
[i
].cand
== cand
)
3516 return group
->cost_map
+ i
;
3517 else if (group
->cost_map
[i
].cand
== NULL
)
3519 for (i
= 0; i
< s
; i
++)
3520 if (group
->cost_map
[i
].cand
== cand
)
3521 return group
->cost_map
+ i
;
3522 else if (group
->cost_map
[i
].cand
== NULL
)
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3530 produce_memory_decl_rtl (tree obj
, int *regno
)
3532 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3533 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3537 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3539 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3540 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3541 SET_SYMBOL_REF_DECL (x
, obj
);
3542 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3543 set_mem_addr_space (x
, as
);
3544 targetm
.encode_section_info (obj
, x
, true);
3548 x
= gen_raw_REG (address_mode
, (*regno
)++);
3549 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3550 set_mem_addr_space (x
, as
);
3556 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3557 walk_tree. DATA contains the actual fake register number. */
3560 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3562 tree obj
= NULL_TREE
;
3564 int *regno
= (int *) data
;
3566 switch (TREE_CODE (*expr_p
))
3569 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3570 handled_component_p (*expr_p
);
3571 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3574 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3575 x
= produce_memory_decl_rtl (obj
, regno
);
3580 obj
= SSA_NAME_VAR (*expr_p
);
3581 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3584 if (!DECL_RTL_SET_P (obj
))
3585 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3594 if (DECL_RTL_SET_P (obj
))
3597 if (DECL_MODE (obj
) == BLKmode
)
3598 x
= produce_memory_decl_rtl (obj
, regno
);
3600 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3610 decl_rtl_to_reset
.safe_push (obj
);
3611 SET_DECL_RTL (obj
, x
);
3617 /* Determines cost of the computation of EXPR. */
3620 computation_cost (tree expr
, bool speed
)
3624 tree type
= TREE_TYPE (expr
);
3626 /* Avoid using hard regs in ways which may be unsupported. */
3627 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3628 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3629 enum node_frequency real_frequency
= node
->frequency
;
3631 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3632 crtl
->maybe_hot_insn_p
= speed
;
3633 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3635 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3638 default_rtl_profile ();
3639 node
->frequency
= real_frequency
;
3641 cost
= seq_cost (seq
, speed
);
3643 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3644 TYPE_ADDR_SPACE (type
), speed
);
3645 else if (!REG_P (rslt
))
3646 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3651 /* Returns variable containing the value of candidate CAND at statement AT. */
3654 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3656 if (stmt_after_increment (loop
, cand
, stmt
))
3657 return cand
->var_after
;
3659 return cand
->var_before
;
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663 same precision that is at least as wide as the precision of TYPE, stores
3664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3668 determine_common_wider_type (tree
*a
, tree
*b
)
3670 tree wider_type
= NULL
;
3672 tree atype
= TREE_TYPE (*a
);
3674 if (CONVERT_EXPR_P (*a
))
3676 suba
= TREE_OPERAND (*a
, 0);
3677 wider_type
= TREE_TYPE (suba
);
3678 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3684 if (CONVERT_EXPR_P (*b
))
3686 subb
= TREE_OPERAND (*b
, 0);
3687 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3698 /* Determines the expression by that USE is expressed from induction variable
3699 CAND at statement AT in LOOP. The expression is stored in a decomposed
3700 form into AFF. Returns false if USE cannot be expressed using CAND. */
3703 get_computation_aff (struct loop
*loop
,
3704 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3705 struct aff_tree
*aff
)
3707 tree ubase
= use
->iv
->base
;
3708 tree ustep
= use
->iv
->step
;
3709 tree cbase
= cand
->iv
->base
;
3710 tree cstep
= cand
->iv
->step
, cstep_common
;
3711 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3712 tree common_type
, var
;
3714 aff_tree cbase_aff
, var_aff
;
3717 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3719 /* We do not have a precision to express the values of use. */
3723 var
= var_at_stmt (loop
, cand
, at
);
3724 uutype
= unsigned_type_for (utype
);
3726 /* If the conversion is not noop, perform it. */
3727 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3729 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3730 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3732 tree inner_base
, inner_step
, inner_type
;
3733 inner_base
= TREE_OPERAND (cbase
, 0);
3734 if (CONVERT_EXPR_P (cstep
))
3735 inner_step
= TREE_OPERAND (cstep
, 0);
3739 inner_type
= TREE_TYPE (inner_base
);
3740 /* If candidate is added from a biv whose type is smaller than
3741 ctype, we know both candidate and the biv won't overflow.
3742 In this case, it's safe to skip the convertion in candidate.
3743 As an example, (unsigned short)((unsigned long)A) equals to
3744 (unsigned short)A, if A has a type no larger than short. */
3745 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3751 cstep
= fold_convert (uutype
, cstep
);
3752 cbase
= fold_convert (uutype
, cbase
);
3753 var
= fold_convert (uutype
, var
);
3756 /* Ratio is 1 when computing the value of biv cand by itself.
3757 We can't rely on constant_multiple_of in this case because the
3758 use is created after the original biv is selected. The call
3759 could fail because of inconsistent fold behavior. See PR68021
3760 for more information. */
3761 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
3763 gcc_assert (is_gimple_assign (use
->stmt
));
3764 gcc_assert (use
->iv
->ssa_name
== cand
->var_after
);
3765 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
3768 else if (!constant_multiple_of (ustep
, cstep
, &rat
))
3771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772 type, we achieve better folding by computing their difference in this
3773 wider type, and cast the result to UUTYPE. We do not need to worry about
3774 overflows, as all the arithmetics will in the end be performed in UUTYPE
3776 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3778 /* use = ubase - ratio * cbase + ratio * var. */
3779 tree_to_aff_combination (ubase
, common_type
, aff
);
3780 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3781 tree_to_aff_combination (var
, uutype
, &var_aff
);
3783 /* We need to shift the value if we are after the increment. */
3784 if (stmt_after_increment (loop
, cand
, at
))
3788 if (common_type
!= uutype
)
3789 cstep_common
= fold_convert (common_type
, cstep
);
3791 cstep_common
= cstep
;
3793 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3794 aff_combination_add (&cbase_aff
, &cstep_aff
);
3797 aff_combination_scale (&cbase_aff
, -rat
);
3798 aff_combination_add (aff
, &cbase_aff
);
3799 if (common_type
!= uutype
)
3800 aff_combination_convert (aff
, uutype
);
3802 aff_combination_scale (&var_aff
, rat
);
3803 aff_combination_add (aff
, &var_aff
);
3808 /* Return the type of USE. */
3811 get_use_type (struct iv_use
*use
)
3813 tree base_type
= TREE_TYPE (use
->iv
->base
);
3816 if (use
->type
== USE_ADDRESS
)
3818 /* The base_type may be a void pointer. Create a pointer type based on
3819 the mem_ref instead. */
3820 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3821 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3822 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3830 /* Determines the expression by that USE is expressed from induction variable
3831 CAND at statement AT in LOOP. The computation is unshared. */
3834 get_computation_at (struct loop
*loop
,
3835 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3838 tree type
= get_use_type (use
);
3840 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3842 unshare_aff_combination (&aff
);
3843 return fold_convert (type
, aff_combination_to_tree (&aff
));
3846 /* Determines the expression by that USE is expressed from induction variable
3847 CAND in LOOP. The computation is unshared. */
3850 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3852 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856 If we're optimizing for space, the loop setup overhead is constant;
3857 if we're optimizing for speed, amortize it over the per-iteration cost. */
3859 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3863 else if (optimize_loop_for_speed_p (data
->current_loop
))
3864 return cost
/ avg_loop_niter (data
->current_loop
);
3869 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3870 validity for a memory reference accessing memory of mode MODE in
3871 address space AS. */
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3878 #define MAX_RATIO 128
3879 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3880 static vec
<sbitmap
> valid_mult_list
;
3883 if (data_index
>= valid_mult_list
.length ())
3884 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3886 valid_mult
= valid_mult_list
[data_index
];
3889 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3890 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3891 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3895 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3896 bitmap_clear (valid_mult
);
3897 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3898 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3899 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3901 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3902 if (memory_address_addr_space_p (mode
, addr
, as
)
3903 || memory_address_addr_space_p (mode
, scaled
, as
))
3904 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3907 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3909 fprintf (dump_file
, " allowed multipliers:");
3910 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3911 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3912 fprintf (dump_file
, " %d", (int) i
);
3913 fprintf (dump_file
, "\n");
3914 fprintf (dump_file
, "\n");
3917 valid_mult_list
[data_index
] = valid_mult
;
3920 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3923 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3928 variable is omitted. Compute the cost for a memory reference that accesses
3929 a memory location of mode MEM_MODE in address space AS.
3931 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932 size of MEM_MODE / RATIO) is available. To make this determination, we
3933 look at the size of the increment to be made, which is given in CSTEP.
3934 CSTEP may be zero if the step is unknown.
3935 STMT_AFTER_INC is true iff the statement we're looking at is after the
3936 increment of the original biv.
3938 TODO -- there must be some better way. This all is quite crude. */
3942 AINC_PRE_INC
, /* Pre increment. */
3943 AINC_PRE_DEC
, /* Pre decrement. */
3944 AINC_POST_INC
, /* Post increment. */
3945 AINC_POST_DEC
, /* Post decrement. */
3946 AINC_NONE
/* Also the number of auto increment types. */
3949 struct address_cost_data
3951 HOST_WIDE_INT min_offset
, max_offset
;
3952 unsigned costs
[2][2][2][2];
3953 unsigned ainc_costs
[AINC_NONE
];
3958 get_address_cost (bool symbol_present
, bool var_present
,
3959 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3960 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3961 addr_space_t as
, bool speed
,
3962 bool stmt_after_inc
, bool *may_autoinc
)
3964 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3965 static vec
<address_cost_data
*> address_cost_data_list
;
3966 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3967 address_cost_data
*data
;
3968 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3969 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3970 unsigned cost
, acost
, complexity
;
3971 enum ainc_type autoinc_type
;
3972 bool offset_p
, ratio_p
, autoinc
;
3973 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3974 unsigned HOST_WIDE_INT mask
;
3977 if (data_index
>= address_cost_data_list
.length ())
3978 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3980 data
= address_cost_data_list
[data_index
];
3984 HOST_WIDE_INT rat
, off
= 0;
3985 int old_cse_not_expected
, width
;
3986 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3991 data
= (address_cost_data
*) xcalloc (1, sizeof (*data
));
3993 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3995 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3996 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3997 width
= HOST_BITS_PER_WIDE_INT
- 1;
3998 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
4000 for (i
= width
; i
>= 0; i
--)
4002 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
4003 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
4004 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4007 data
->min_offset
= (i
== -1? 0 : off
);
4009 for (i
= width
; i
>= 0; i
--)
4011 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
4012 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
4013 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4015 /* For some strict-alignment targets, the offset must be naturally
4016 aligned. Try an aligned offset if mem_mode is not QImode. */
4017 off
= mem_mode
!= QImode
4018 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
4019 - GET_MODE_SIZE (mem_mode
)
4023 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
4024 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4030 data
->max_offset
= off
;
4032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4034 fprintf (dump_file
, "get_address_cost:\n");
4035 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
4036 GET_MODE_NAME (mem_mode
),
4038 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
4039 GET_MODE_NAME (mem_mode
),
4044 for (i
= 2; i
<= MAX_RATIO
; i
++)
4045 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
4051 /* Compute the cost of various addressing modes. */
4053 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4054 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
4056 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4057 || USE_STORE_PRE_DECREMENT (mem_mode
))
4059 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
4060 has_predec
[mem_mode
]
4061 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4063 if (has_predec
[mem_mode
])
4064 data
->ainc_costs
[AINC_PRE_DEC
]
4065 = address_cost (addr
, mem_mode
, as
, speed
);
4067 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4068 || USE_STORE_POST_DECREMENT (mem_mode
))
4070 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
4071 has_postdec
[mem_mode
]
4072 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4074 if (has_postdec
[mem_mode
])
4075 data
->ainc_costs
[AINC_POST_DEC
]
4076 = address_cost (addr
, mem_mode
, as
, speed
);
4078 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4079 || USE_STORE_PRE_DECREMENT (mem_mode
))
4081 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
4082 has_preinc
[mem_mode
]
4083 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4085 if (has_preinc
[mem_mode
])
4086 data
->ainc_costs
[AINC_PRE_INC
]
4087 = address_cost (addr
, mem_mode
, as
, speed
);
4089 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4090 || USE_STORE_POST_INCREMENT (mem_mode
))
4092 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
4093 has_postinc
[mem_mode
]
4094 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4096 if (has_postinc
[mem_mode
])
4097 data
->ainc_costs
[AINC_POST_INC
]
4098 = address_cost (addr
, mem_mode
, as
, speed
);
4100 for (i
= 0; i
< 16; i
++)
4103 var_p
= (i
>> 1) & 1;
4104 off_p
= (i
>> 2) & 1;
4105 rat_p
= (i
>> 3) & 1;
4109 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
4110 gen_int_mode (rat
, address_mode
));
4113 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
4117 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
4118 /* ??? We can run into trouble with some backends by presenting
4119 it with symbols which haven't been properly passed through
4120 targetm.encode_section_info. By setting the local bit, we
4121 enhance the probability of things working. */
4122 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
4125 base
= gen_rtx_fmt_e (CONST
, address_mode
,
4127 (PLUS
, address_mode
, base
,
4128 gen_int_mode (off
, address_mode
)));
4131 base
= gen_int_mode (off
, address_mode
);
4136 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
4139 /* To avoid splitting addressing modes, pretend that no cse will
4141 old_cse_not_expected
= cse_not_expected
;
4142 cse_not_expected
= true;
4143 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
4144 cse_not_expected
= old_cse_not_expected
;
4148 acost
= seq_cost (seq
, speed
);
4149 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
4153 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
4156 /* On some targets, it is quite expensive to load symbol to a register,
4157 which makes addresses that contain symbols look much more expensive.
4158 However, the symbol will have to be loaded in any case before the
4159 loop (and quite likely we have it in register already), so it does not
4160 make much sense to penalize them too heavily. So make some final
4161 tweaks for the SYMBOL_PRESENT modes:
4163 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164 var is cheaper, use this mode with small penalty.
4165 If VAR_PRESENT is true, try whether the mode with
4166 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167 if this is the case, use it. */
4168 add_c
= add_cost (speed
, address_mode
);
4169 for (i
= 0; i
< 8; i
++)
4172 off_p
= (i
>> 1) & 1;
4173 rat_p
= (i
>> 2) & 1;
4175 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
4179 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
4180 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
4183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4185 fprintf (dump_file
, "<Address Costs>:\n");
4187 for (i
= 0; i
< 16; i
++)
4190 var_p
= (i
>> 1) & 1;
4191 off_p
= (i
>> 2) & 1;
4192 rat_p
= (i
>> 3) & 1;
4194 fprintf (dump_file
, " ");
4196 fprintf (dump_file
, "sym + ");
4198 fprintf (dump_file
, "var + ");
4200 fprintf (dump_file
, "cst + ");
4202 fprintf (dump_file
, "rat * ");
4204 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4205 fprintf (dump_file
, "index costs %d\n", acost
);
4207 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4208 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4209 fprintf (dump_file
, " May include autoinc/dec\n");
4210 fprintf (dump_file
, "\n");
4213 address_cost_data_list
[data_index
] = data
;
4216 bits
= GET_MODE_BITSIZE (address_mode
);
4217 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4219 if ((offset
>> (bits
- 1) & 1))
4224 autoinc_type
= AINC_NONE
;
4225 msize
= GET_MODE_SIZE (mem_mode
);
4226 autoinc_offset
= offset
;
4228 autoinc_offset
+= ratio
* cstep
;
4229 if (symbol_present
|| var_present
|| ratio
!= 1)
4233 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4235 autoinc_type
= AINC_POST_INC
;
4236 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4238 autoinc_type
= AINC_POST_DEC
;
4239 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4241 autoinc_type
= AINC_PRE_INC
;
4242 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4244 autoinc_type
= AINC_PRE_DEC
;
4246 if (autoinc_type
!= AINC_NONE
)
4251 offset_p
= (s_offset
!= 0
4252 && data
->min_offset
<= s_offset
4253 && s_offset
<= data
->max_offset
);
4254 ratio_p
= (ratio
!= 1
4255 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4257 if (ratio
!= 1 && !ratio_p
)
4258 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4260 if (s_offset
&& !offset_p
&& !symbol_present
)
4261 cost
+= add_cost (speed
, address_mode
);
4264 *may_autoinc
= autoinc
;
4266 acost
= data
->ainc_costs
[autoinc_type
];
4268 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4269 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4270 return comp_cost (cost
+ acost
, complexity
);
4273 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4274 EXPR operand holding the shift. COST0 and COST1 are the costs for
4275 calculating the operands of EXPR. Returns true if successful, and returns
4276 the cost in COST. */
4279 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4280 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4283 tree op1
= TREE_OPERAND (expr
, 1);
4284 tree cst
= TREE_OPERAND (mult
, 1);
4285 tree multop
= TREE_OPERAND (mult
, 0);
4286 int m
= exact_log2 (int_cst_value (cst
));
4287 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4288 int as_cost
, sa_cost
;
4291 if (!(m
>= 0 && m
< maxm
))
4295 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4297 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4299 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300 use that in preference to a shift insn followed by an add insn. */
4301 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4302 ? shiftadd_cost (speed
, mode
, m
)
4304 ? shiftsub1_cost (speed
, mode
, m
)
4305 : shiftsub0_cost (speed
, mode
, m
)));
4307 res
= comp_cost (MIN (as_cost
, sa_cost
), 0);
4308 res
+= (mult_in_op1
? cost0
: cost1
);
4310 STRIP_NOPS (multop
);
4311 if (!is_gimple_val (multop
))
4312 res
+= force_expr_to_var_cost (multop
, speed
);
4318 /* Estimates cost of forcing expression EXPR into a variable. */
4321 force_expr_to_var_cost (tree expr
, bool speed
)
4323 static bool costs_initialized
= false;
4324 static unsigned integer_cost
[2];
4325 static unsigned symbol_cost
[2];
4326 static unsigned address_cost
[2];
4328 comp_cost cost0
, cost1
, cost
;
4331 if (!costs_initialized
)
4333 tree type
= build_pointer_type (integer_type_node
);
4338 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4339 TREE_STATIC (var
) = 1;
4340 x
= produce_memory_decl_rtl (var
, NULL
);
4341 SET_DECL_RTL (var
, x
);
4343 addr
= build1 (ADDR_EXPR
, type
, var
);
4346 for (i
= 0; i
< 2; i
++)
4348 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4351 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4354 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4357 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4358 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4359 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4360 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4361 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4362 fprintf (dump_file
, "\n");
4366 costs_initialized
= true;
4371 if (SSA_VAR_P (expr
))
4374 if (is_gimple_min_invariant (expr
))
4376 if (TREE_CODE (expr
) == INTEGER_CST
)
4377 return comp_cost (integer_cost
[speed
], 0);
4379 if (TREE_CODE (expr
) == ADDR_EXPR
)
4381 tree obj
= TREE_OPERAND (expr
, 0);
4383 if (TREE_CODE (obj
) == VAR_DECL
4384 || TREE_CODE (obj
) == PARM_DECL
4385 || TREE_CODE (obj
) == RESULT_DECL
)
4386 return comp_cost (symbol_cost
[speed
], 0);
4389 return comp_cost (address_cost
[speed
], 0);
4392 switch (TREE_CODE (expr
))
4394 case POINTER_PLUS_EXPR
:
4398 op0
= TREE_OPERAND (expr
, 0);
4399 op1
= TREE_OPERAND (expr
, 1);
4406 op0
= TREE_OPERAND (expr
, 0);
4412 /* Just an arbitrary value, FIXME. */
4413 return comp_cost (target_spill_cost
[speed
], 0);
4416 if (op0
== NULL_TREE
4417 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4420 cost0
= force_expr_to_var_cost (op0
, speed
);
4422 if (op1
== NULL_TREE
4423 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4426 cost1
= force_expr_to_var_cost (op1
, speed
);
4428 mode
= TYPE_MODE (TREE_TYPE (expr
));
4429 switch (TREE_CODE (expr
))
4431 case POINTER_PLUS_EXPR
:
4435 cost
= comp_cost (add_cost (speed
, mode
), 0);
4436 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4438 tree mult
= NULL_TREE
;
4440 if (TREE_CODE (op1
) == MULT_EXPR
)
4442 else if (TREE_CODE (op0
) == MULT_EXPR
)
4445 if (mult
!= NULL_TREE
4446 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4447 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4455 tree inner_mode
, outer_mode
;
4456 outer_mode
= TREE_TYPE (expr
);
4457 inner_mode
= TREE_TYPE (op0
);
4458 cost
= comp_cost (convert_cost (TYPE_MODE (outer_mode
),
4459 TYPE_MODE (inner_mode
), speed
), 0);
4464 if (cst_and_fits_in_hwi (op0
))
4465 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op0
),
4467 else if (cst_and_fits_in_hwi (op1
))
4468 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op1
),
4471 return comp_cost (target_spill_cost
[speed
], 0);
4481 /* Bound the cost by target_spill_cost. The parts of complicated
4482 computations often are either loop invariant or at least can
4483 be shared between several iv uses, so letting this grow without
4484 limits would not give reasonable results. */
4485 if (cost
.cost
> (int) target_spill_cost
[speed
])
4486 cost
.cost
= target_spill_cost
[speed
];
4491 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4492 invariants the computation depends on. */
4495 force_var_cost (struct ivopts_data
*data
,
4496 tree expr
, bitmap
*depends_on
)
4500 fd_ivopts_data
= data
;
4501 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4504 return force_expr_to_var_cost (expr
, data
->speed
);
4507 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4508 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4510 invariants the computation depends on. */
4513 split_address_cost (struct ivopts_data
*data
,
4514 tree addr
, bool *symbol_present
, bool *var_present
,
4515 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4518 HOST_WIDE_INT bitsize
;
4519 HOST_WIDE_INT bitpos
;
4522 int unsignedp
, reversep
, volatilep
;
4524 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4525 &unsignedp
, &reversep
, &volatilep
, false);
4528 || bitpos
% BITS_PER_UNIT
!= 0
4530 || TREE_CODE (core
) != VAR_DECL
)
4532 *symbol_present
= false;
4533 *var_present
= true;
4534 fd_ivopts_data
= data
;
4536 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4538 return comp_cost (target_spill_cost
[data
->speed
], 0);
4541 *offset
+= bitpos
/ BITS_PER_UNIT
;
4542 if (TREE_STATIC (core
)
4543 || DECL_EXTERNAL (core
))
4545 *symbol_present
= true;
4546 *var_present
= false;
4550 *symbol_present
= false;
4551 *var_present
= true;
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556 var + symbol + offset. The value of offset is added to OFFSET,
4557 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558 part is missing. DEPENDS_ON is a set of the invariants the computation
4562 ptr_difference_cost (struct ivopts_data
*data
,
4563 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4564 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4566 HOST_WIDE_INT diff
= 0;
4567 aff_tree aff_e1
, aff_e2
;
4570 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4572 if (ptr_difference_const (e1
, e2
, &diff
))
4575 *symbol_present
= false;
4576 *var_present
= false;
4580 if (integer_zerop (e2
))
4581 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4582 symbol_present
, var_present
, offset
, depends_on
);
4584 *symbol_present
= false;
4585 *var_present
= true;
4587 type
= signed_type_for (TREE_TYPE (e1
));
4588 tree_to_aff_combination (e1
, type
, &aff_e1
);
4589 tree_to_aff_combination (e2
, type
, &aff_e2
);
4590 aff_combination_scale (&aff_e2
, -1);
4591 aff_combination_add (&aff_e1
, &aff_e2
);
4593 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4596 /* Estimates cost of expressing difference E1 - E2 as
4597 var + symbol + offset. The value of offset is added to OFFSET,
4598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599 part is missing. DEPENDS_ON is a set of the invariants the computation
4603 difference_cost (struct ivopts_data
*data
,
4604 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4605 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4607 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4608 unsigned HOST_WIDE_INT off1
, off2
;
4609 aff_tree aff_e1
, aff_e2
;
4612 e1
= strip_offset (e1
, &off1
);
4613 e2
= strip_offset (e2
, &off2
);
4614 *offset
+= off1
- off2
;
4619 if (TREE_CODE (e1
) == ADDR_EXPR
)
4620 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4621 offset
, depends_on
);
4622 *symbol_present
= false;
4624 if (operand_equal_p (e1
, e2
, 0))
4626 *var_present
= false;
4630 *var_present
= true;
4632 if (integer_zerop (e2
))
4633 return force_var_cost (data
, e1
, depends_on
);
4635 if (integer_zerop (e1
))
4637 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4638 cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4642 type
= signed_type_for (TREE_TYPE (e1
));
4643 tree_to_aff_combination (e1
, type
, &aff_e1
);
4644 tree_to_aff_combination (e2
, type
, &aff_e2
);
4645 aff_combination_scale (&aff_e2
, -1);
4646 aff_combination_add (&aff_e1
, &aff_e2
);
4648 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4651 /* Returns true if AFF1 and AFF2 are identical. */
4654 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4658 if (aff1
->n
!= aff2
->n
)
4661 for (i
= 0; i
< aff1
->n
; i
++)
4663 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4666 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4674 static iv_inv_expr_ent
*
4675 record_inv_expr (struct ivopts_data
*data
, tree expr
)
4677 struct iv_inv_expr_ent ent
;
4678 struct iv_inv_expr_ent
**slot
;
4681 ent
.hash
= iterative_hash_expr (expr
, 0);
4682 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4686 *slot
= XNEW (struct iv_inv_expr_ent
);
4687 (*slot
)->expr
= expr
;
4688 (*slot
)->hash
= ent
.hash
;
4689 (*slot
)->id
= data
->max_inv_expr_id
++;
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696 requires a new compiler generated temporary. Returns -1 otherwise.
4697 ADDRESS_P is a flag indicating if the expression is for address
4700 static iv_inv_expr_ent
*
4701 get_loop_invariant_expr (struct ivopts_data
*data
, tree ubase
,
4702 tree cbase
, HOST_WIDE_INT ratio
,
4705 aff_tree ubase_aff
, cbase_aff
;
4713 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4714 && (TREE_CODE (cbase
) == INTEGER_CST
))
4717 /* Strips the constant part. */
4718 if (TREE_CODE (ubase
) == PLUS_EXPR
4719 || TREE_CODE (ubase
) == MINUS_EXPR
4720 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4722 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4723 ubase
= TREE_OPERAND (ubase
, 0);
4726 /* Strips the constant part. */
4727 if (TREE_CODE (cbase
) == PLUS_EXPR
4728 || TREE_CODE (cbase
) == MINUS_EXPR
4729 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4731 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4732 cbase
= TREE_OPERAND (cbase
, 0);
4737 if (((TREE_CODE (ubase
) == SSA_NAME
)
4738 || (TREE_CODE (ubase
) == ADDR_EXPR
4739 && is_gimple_min_invariant (ubase
)))
4740 && (TREE_CODE (cbase
) == INTEGER_CST
))
4743 if (((TREE_CODE (cbase
) == SSA_NAME
)
4744 || (TREE_CODE (cbase
) == ADDR_EXPR
4745 && is_gimple_min_invariant (cbase
)))
4746 && (TREE_CODE (ubase
) == INTEGER_CST
))
4752 if (operand_equal_p (ubase
, cbase
, 0))
4755 if (TREE_CODE (ubase
) == ADDR_EXPR
4756 && TREE_CODE (cbase
) == ADDR_EXPR
)
4760 usym
= TREE_OPERAND (ubase
, 0);
4761 csym
= TREE_OPERAND (cbase
, 0);
4762 if (TREE_CODE (usym
) == ARRAY_REF
)
4764 tree ind
= TREE_OPERAND (usym
, 1);
4765 if (TREE_CODE (ind
) == INTEGER_CST
4766 && tree_fits_shwi_p (ind
)
4767 && tree_to_shwi (ind
) == 0)
4768 usym
= TREE_OPERAND (usym
, 0);
4770 if (TREE_CODE (csym
) == ARRAY_REF
)
4772 tree ind
= TREE_OPERAND (csym
, 1);
4773 if (TREE_CODE (ind
) == INTEGER_CST
4774 && tree_fits_shwi_p (ind
)
4775 && tree_to_shwi (ind
) == 0)
4776 csym
= TREE_OPERAND (csym
, 0);
4778 if (operand_equal_p (usym
, csym
, 0))
4781 /* Now do more complex comparison */
4782 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4783 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4784 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4788 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4789 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4791 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4792 aff_combination_add (&ubase_aff
, &cbase_aff
);
4793 expr
= aff_combination_to_tree (&ubase_aff
);
4794 return record_inv_expr (data
, expr
);
4797 /* Scale (multiply) the computed COST (except scratch part that should be
4798 hoisted out a loop) by header->frequency / AT->frequency,
4799 which makes expected cost more accurate. */
4802 get_scaled_computation_cost_at (ivopts_data
*data
, gimple
*at
, iv_cand
*cand
,
4805 int loop_freq
= data
->current_loop
->header
->frequency
;
4806 int bb_freq
= at
->bb
->frequency
;
4809 gcc_assert (cost
.scratch
<= cost
.cost
);
4811 = cost
.scratch
+ (cost
.cost
- cost
.scratch
) * bb_freq
/ loop_freq
;
4813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4814 fprintf (dump_file
, "Scaling iv_use based on cand %d "
4815 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4816 cand
->id
, 1.0f
* bb_freq
/ loop_freq
, cost
.cost
,
4817 cost
.scratch
, scaled_cost
, bb_freq
, loop_freq
);
4819 cost
.cost
= scaled_cost
;
4825 /* Determines the cost of the computation by that USE is expressed
4826 from induction variable CAND. If ADDRESS_P is true, we just need
4827 to create an address from it, otherwise we want to get it into
4828 register. A set of invariants we depend on is stored in
4829 DEPENDS_ON. AT is the statement at that the value is computed.
4830 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4831 addressing is likely. */
4834 get_computation_cost_at (struct ivopts_data
*data
,
4835 struct iv_use
*use
, struct iv_cand
*cand
,
4836 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4838 iv_inv_expr_ent
**inv_expr
)
4840 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4842 tree utype
= TREE_TYPE (ubase
), ctype
;
4843 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4844 HOST_WIDE_INT ratio
, aratio
;
4845 bool var_present
, symbol_present
, stmt_is_after_inc
;
4848 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4849 machine_mode mem_mode
= (address_p
4850 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4856 /* Only consider real candidates. */
4858 return infinite_cost
;
4860 cbase
= cand
->iv
->base
;
4861 cstep
= cand
->iv
->step
;
4862 ctype
= TREE_TYPE (cbase
);
4864 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4866 /* We do not have a precision to express the values of use. */
4867 return infinite_cost
;
4871 || (use
->iv
->base_object
4872 && cand
->iv
->base_object
4873 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4874 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4876 /* Do not try to express address of an object with computation based
4877 on address of a different object. This may cause problems in rtl
4878 level alias analysis (that does not expect this to be happening,
4879 as this is illegal in C), and would be unlikely to be useful
4881 if (use
->iv
->base_object
4882 && cand
->iv
->base_object
4883 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4884 return infinite_cost
;
4887 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4889 /* TODO -- add direct handling of this case. */
4893 /* CSTEPI is removed from the offset in case statement is after the
4894 increment. If the step is not constant, we use zero instead.
4895 This is a bit imprecise (there is the extra addition), but
4896 redundancy elimination is likely to transform the code so that
4897 it uses value of the variable before increment anyway,
4898 so it is not that much unrealistic. */
4899 if (cst_and_fits_in_hwi (cstep
))
4900 cstepi
= int_cst_value (cstep
);
4904 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4905 return infinite_cost
;
4907 if (wi::fits_shwi_p (rat
))
4908 ratio
= rat
.to_shwi ();
4910 return infinite_cost
;
4913 ctype
= TREE_TYPE (cbase
);
4915 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4917 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4918 or ratio == 1, it is better to handle this like
4920 ubase - ratio * cbase + ratio * var
4922 (also holds in the case ratio == -1, TODO. */
4924 if (cst_and_fits_in_hwi (cbase
))
4926 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4927 cost
= difference_cost (data
,
4928 ubase
, build_int_cst (utype
, 0),
4929 &symbol_present
, &var_present
, &offset
,
4931 cost
/= avg_loop_niter (data
->current_loop
);
4933 else if (ratio
== 1)
4935 tree real_cbase
= cbase
;
4937 /* Check to see if any adjustment is needed. */
4938 if (cstepi
== 0 && stmt_is_after_inc
)
4940 aff_tree real_cbase_aff
;
4943 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4945 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4947 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4948 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4951 cost
= difference_cost (data
,
4953 &symbol_present
, &var_present
, &offset
,
4955 cost
/= avg_loop_niter (data
->current_loop
);
4958 && !POINTER_TYPE_P (ctype
)
4959 && multiplier_allowed_in_address_p
4961 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4963 tree real_cbase
= cbase
;
4965 if (cstepi
== 0 && stmt_is_after_inc
)
4967 if (POINTER_TYPE_P (ctype
))
4968 real_cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4970 real_cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4972 real_cbase
= fold_build2 (MULT_EXPR
, ctype
, real_cbase
,
4973 build_int_cst (ctype
, ratio
));
4974 cost
= difference_cost (data
,
4976 &symbol_present
, &var_present
, &offset
,
4978 cost
/= avg_loop_niter (data
->current_loop
);
4982 cost
= force_var_cost (data
, cbase
, depends_on
);
4983 cost
+= difference_cost (data
, ubase
, build_int_cst (utype
, 0),
4984 &symbol_present
, &var_present
, &offset
,
4986 cost
/= avg_loop_niter (data
->current_loop
);
4987 cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4990 /* Record setup cost in scratch field. */
4991 cost
.scratch
= cost
.cost
;
4993 if (inv_expr
&& depends_on
&& *depends_on
)
4995 *inv_expr
= get_loop_invariant_expr (data
, ubase
, cbase
, ratio
,
4997 /* Clear depends on. */
4998 if (*inv_expr
!= NULL
)
4999 bitmap_clear (*depends_on
);
5002 /* If we are after the increment, the value of the candidate is higher by
5004 if (stmt_is_after_inc
)
5005 offset
-= ratio
* cstepi
;
5007 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5008 (symbol/var1/const parts may be omitted). If we are looking for an
5009 address, find the cost of addressing this. */
5012 cost
+= get_address_cost (symbol_present
, var_present
,
5013 offset
, ratio
, cstepi
,
5015 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
5016 speed
, stmt_is_after_inc
, can_autoinc
);
5017 return get_scaled_computation_cost_at (data
, at
, cand
, cost
);
5020 /* Otherwise estimate the costs for computing the expression. */
5021 if (!symbol_present
&& !var_present
&& !offset
)
5024 cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
5025 return get_scaled_computation_cost_at (data
, at
, cand
, cost
);
5028 /* Symbol + offset should be compile-time computable so consider that they
5029 are added once to the variable, if present. */
5030 if (var_present
&& (symbol_present
|| offset
))
5031 cost
+= adjust_setup_cost (data
,
5032 add_cost (speed
, TYPE_MODE (ctype
)));
5034 /* Having offset does not affect runtime cost in case it is added to
5035 symbol, but it increases complexity. */
5039 cost
+= add_cost (speed
, TYPE_MODE (ctype
));
5041 aratio
= ratio
> 0 ? ratio
: -ratio
;
5043 cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
5045 return get_scaled_computation_cost_at (data
, at
, cand
, cost
);
5049 *can_autoinc
= false;
5051 /* Just get the expression, expand it and measure the cost. */
5052 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
5055 return infinite_cost
;
5058 comp
= build_simple_mem_ref (comp
);
5060 cost
= comp_cost (computation_cost (comp
, speed
), 0);
5062 return get_scaled_computation_cost_at (data
, at
, cand
, cost
);
5065 /* Determines the cost of the computation by that USE is expressed
5066 from induction variable CAND. If ADDRESS_P is true, we just need
5067 to create an address from it, otherwise we want to get it into
5068 register. A set of invariants we depend on is stored in
5069 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5070 autoinc addressing is likely. */
5073 get_computation_cost (struct ivopts_data
*data
,
5074 struct iv_use
*use
, struct iv_cand
*cand
,
5075 bool address_p
, bitmap
*depends_on
,
5076 bool *can_autoinc
, iv_inv_expr_ent
**inv_expr
)
5078 return get_computation_cost_at (data
,
5079 use
, cand
, address_p
, depends_on
, use
->stmt
,
5080 can_autoinc
, inv_expr
);
5083 /* Determines cost of computing the use in GROUP with CAND in a generic
5087 determine_group_iv_cost_generic (struct ivopts_data
*data
,
5088 struct iv_group
*group
, struct iv_cand
*cand
)
5091 iv_inv_expr_ent
*inv_expr
= NULL
;
5092 bitmap depends_on
= NULL
;
5093 struct iv_use
*use
= group
->vuses
[0];
5095 /* The simple case first -- if we need to express value of the preserved
5096 original biv, the cost is 0. This also prevents us from counting the
5097 cost of increment twice -- once at this use and once in the cost of
5099 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
5102 cost
= get_computation_cost (data
, use
, cand
, false,
5103 &depends_on
, NULL
, &inv_expr
);
5105 set_group_iv_cost (data
, group
, cand
, cost
, depends_on
,
5106 NULL_TREE
, ERROR_MARK
, inv_expr
);
5107 return !cost
.infinite_cost_p ();
5110 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5113 determine_group_iv_cost_address (struct ivopts_data
*data
,
5114 struct iv_group
*group
, struct iv_cand
*cand
)
5119 iv_inv_expr_ent
*inv_expr
= NULL
;
5120 struct iv_use
*use
= group
->vuses
[0];
5121 comp_cost sum_cost
= no_cost
, cost
;
5123 cost
= get_computation_cost (data
, use
, cand
, true,
5124 &depends_on
, &can_autoinc
, &inv_expr
);
5127 if (!sum_cost
.infinite_cost_p () && cand
->ainc_use
== use
)
5130 sum_cost
-= cand
->cost_step
;
5131 /* If we generated the candidate solely for exploiting autoincrement
5132 opportunities, and it turns out it can't be used, set the cost to
5133 infinity to make sure we ignore it. */
5134 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
5135 sum_cost
= infinite_cost
;
5138 /* Uses in a group can share setup code, so only add setup cost once. */
5139 cost
-= cost
.scratch
;
5140 /* Compute and add costs for rest uses of this group. */
5141 for (i
= 1; i
< group
->vuses
.length () && !sum_cost
.infinite_cost_p (); i
++)
5143 struct iv_use
*next
= group
->vuses
[i
];
5145 /* TODO: We could skip computing cost for sub iv_use when it has the
5146 same cost as the first iv_use, but the cost really depends on the
5147 offset and where the iv_use is. */
5148 cost
= get_computation_cost (data
, next
, cand
, true,
5149 NULL
, &can_autoinc
, NULL
);
5152 set_group_iv_cost (data
, group
, cand
, sum_cost
, depends_on
,
5153 NULL_TREE
, ERROR_MARK
, inv_expr
);
5155 return !sum_cost
.infinite_cost_p ();
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159 stores it to VAL. */
5162 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
5165 aff_tree step
, delta
, nit
;
5166 struct iv
*iv
= cand
->iv
;
5167 tree type
= TREE_TYPE (iv
->base
);
5168 tree steptype
= type
;
5169 if (POINTER_TYPE_P (type
))
5170 steptype
= sizetype
;
5171 steptype
= unsigned_type_for (type
);
5173 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
5174 aff_combination_convert (&step
, steptype
);
5175 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
5176 aff_combination_convert (&nit
, steptype
);
5177 aff_combination_mult (&nit
, &step
, &delta
);
5178 if (stmt_after_increment (loop
, cand
, at
))
5179 aff_combination_add (&delta
, &step
);
5181 tree_to_aff_combination (iv
->base
, type
, val
);
5182 if (!POINTER_TYPE_P (type
))
5183 aff_combination_convert (val
, steptype
);
5184 aff_combination_add (val
, &delta
);
5187 /* Returns period of induction variable iv. */
5190 iv_period (struct iv
*iv
)
5192 tree step
= iv
->step
, period
, type
;
5195 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
5197 type
= unsigned_type_for (TREE_TYPE (step
));
5198 /* Period of the iv is lcm (step, type_range)/step -1,
5199 i.e., N*type_range/step - 1. Since type range is power
5200 of two, N == (step >> num_of_ending_zeros_binary (step),
5201 so the final result is
5203 (type_range >> num_of_ending_zeros_binary (step)) - 1
5206 pow2div
= num_ending_zeros (step
);
5208 period
= build_low_bits_mask (type
,
5209 (TYPE_PRECISION (type
)
5210 - tree_to_uhwi (pow2div
)));
5215 /* Returns the comparison operator used when eliminating the iv USE. */
5217 static enum tree_code
5218 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
5220 struct loop
*loop
= data
->current_loop
;
5224 ex_bb
= gimple_bb (use
->stmt
);
5225 exit
= EDGE_SUCC (ex_bb
, 0);
5226 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5227 exit
= EDGE_SUCC (ex_bb
, 1);
5229 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5233 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234 calculation is performed in non-wrapping type.
5236 TODO: More generally, we could test for the situation that
5237 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238 This would require knowing the sign of OFFSET. */
5241 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5243 enum tree_code code
;
5245 aff_tree aff_e1
, aff_e2
, aff_offset
;
5247 if (!nowrap_type_p (TREE_TYPE (base
)))
5250 base
= expand_simple_operations (base
);
5252 if (TREE_CODE (base
) == SSA_NAME
)
5254 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5256 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5259 code
= gimple_assign_rhs_code (stmt
);
5260 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5263 e1
= gimple_assign_rhs1 (stmt
);
5264 e2
= gimple_assign_rhs2 (stmt
);
5268 code
= TREE_CODE (base
);
5269 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5271 e1
= TREE_OPERAND (base
, 0);
5272 e2
= TREE_OPERAND (base
, 1);
5275 /* Use affine expansion as deeper inspection to prove the equality. */
5276 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5277 &aff_e2
, &data
->name_expansion_cache
);
5278 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5279 &aff_offset
, &data
->name_expansion_cache
);
5280 aff_combination_scale (&aff_offset
, -1);
5284 aff_combination_add (&aff_e2
, &aff_offset
);
5285 if (aff_combination_zero_p (&aff_e2
))
5288 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5289 &aff_e1
, &data
->name_expansion_cache
);
5290 aff_combination_add (&aff_e1
, &aff_offset
);
5291 return aff_combination_zero_p (&aff_e1
);
5293 case POINTER_PLUS_EXPR
:
5294 aff_combination_add (&aff_e2
, &aff_offset
);
5295 return aff_combination_zero_p (&aff_e2
);
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303 comparison with CAND. NITER describes the number of iterations of
5304 the loops. If successful, the comparison in COMP_P is altered accordingly.
5306 We aim to handle the following situation:
5322 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323 We aim to optimize this to
5331 while (p < p_0 - a + b);
5333 This preserves the correctness, since the pointer arithmetics does not
5334 overflow. More precisely:
5336 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337 overflow in computing it or the values of p.
5338 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339 overflow. To prove this, we use the fact that p_0 = base + a. */
5342 iv_elimination_compare_lt (struct ivopts_data
*data
,
5343 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5344 struct tree_niter_desc
*niter
)
5346 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5347 struct aff_tree nit
, tmpa
, tmpb
;
5348 enum tree_code comp
;
5351 /* We need to know that the candidate induction variable does not overflow.
5352 While more complex analysis may be used to prove this, for now just
5353 check that the variable appears in the original program and that it
5354 is computed in a type that guarantees no overflows. */
5355 cand_type
= TREE_TYPE (cand
->iv
->base
);
5356 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5359 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360 the calculation of the BOUND could overflow, making the comparison
5362 if (!data
->loop_single_exit_p
)
5365 /* We need to be able to decide whether candidate is increasing or decreasing
5366 in order to choose the right comparison operator. */
5367 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5369 step
= int_cst_value (cand
->iv
->step
);
5371 /* Check that the number of iterations matches the expected pattern:
5372 a + 1 > b ? 0 : b - a - 1. */
5373 mbz
= niter
->may_be_zero
;
5374 if (TREE_CODE (mbz
) == GT_EXPR
)
5376 /* Handle a + 1 > b. */
5377 tree op0
= TREE_OPERAND (mbz
, 0);
5378 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5380 a
= TREE_OPERAND (op0
, 0);
5381 b
= TREE_OPERAND (mbz
, 1);
5386 else if (TREE_CODE (mbz
) == LT_EXPR
)
5388 tree op1
= TREE_OPERAND (mbz
, 1);
5390 /* Handle b < a + 1. */
5391 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5393 a
= TREE_OPERAND (op1
, 0);
5394 b
= TREE_OPERAND (mbz
, 0);
5402 /* Expected number of iterations is B - A - 1. Check that it matches
5403 the actual number, i.e., that B - A - NITER = 1. */
5404 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5405 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5406 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5407 aff_combination_scale (&nit
, -1);
5408 aff_combination_scale (&tmpa
, -1);
5409 aff_combination_add (&tmpb
, &tmpa
);
5410 aff_combination_add (&tmpb
, &nit
);
5411 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5414 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5416 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5418 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5419 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5422 /* Determine the new comparison operator. */
5423 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5424 if (*comp_p
== NE_EXPR
)
5426 else if (*comp_p
== EQ_EXPR
)
5427 *comp_p
= invert_tree_comparison (comp
, false);
5434 /* Check whether it is possible to express the condition in USE by comparison
5435 of candidate CAND. If so, store the value compared with to BOUND, and the
5436 comparison operator to COMP. */
5439 may_eliminate_iv (struct ivopts_data
*data
,
5440 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5441 enum tree_code
*comp
)
5446 struct loop
*loop
= data
->current_loop
;
5448 struct tree_niter_desc
*desc
= NULL
;
5450 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5453 /* For now works only for exits that dominate the loop latch.
5454 TODO: extend to other conditions inside loop body. */
5455 ex_bb
= gimple_bb (use
->stmt
);
5456 if (use
->stmt
!= last_stmt (ex_bb
)
5457 || gimple_code (use
->stmt
) != GIMPLE_COND
5458 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5461 exit
= EDGE_SUCC (ex_bb
, 0);
5462 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5463 exit
= EDGE_SUCC (ex_bb
, 1);
5464 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5467 desc
= niter_for_exit (data
, exit
);
5471 /* Determine whether we can use the variable to test the exit condition.
5472 This is the case iff the period of the induction variable is greater
5473 than the number of iterations for which the exit condition is true. */
5474 period
= iv_period (cand
->iv
);
5476 /* If the number of iterations is constant, compare against it directly. */
5477 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5479 /* See cand_value_at. */
5480 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5482 if (!tree_int_cst_lt (desc
->niter
, period
))
5487 if (tree_int_cst_lt (period
, desc
->niter
))
5492 /* If not, and if this is the only possible exit of the loop, see whether
5493 we can get a conservative estimate on the number of iterations of the
5494 entire loop and compare against that instead. */
5497 widest_int period_value
, max_niter
;
5499 max_niter
= desc
->max
;
5500 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5502 period_value
= wi::to_widest (period
);
5503 if (wi::gtu_p (max_niter
, period_value
))
5505 /* See if we can take advantage of inferred loop bound
5507 if (data
->loop_single_exit_p
)
5509 if (!max_loop_iterations (loop
, &max_niter
))
5511 /* The loop bound is already adjusted by adding 1. */
5512 if (wi::gtu_p (max_niter
, period_value
))
5520 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5522 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5523 aff_combination_to_tree (&bnd
));
5524 *comp
= iv_elimination_compare (data
, use
);
5526 /* It is unlikely that computing the number of iterations using division
5527 would be more profitable than keeping the original induction variable. */
5528 if (expression_expensive_p (*bound
))
5531 /* Sometimes, it is possible to handle the situation that the number of
5532 iterations may be zero unless additional assumtions by using <
5533 instead of != in the exit condition.
5535 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5536 base the exit condition on it. However, that is often too
5538 if (!integer_zerop (desc
->may_be_zero
))
5539 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5544 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5545 be copied, if it is used in the loop body and DATA->body_includes_call. */
5548 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5550 tree sbound
= bound
;
5551 STRIP_NOPS (sbound
);
5553 if (TREE_CODE (sbound
) == SSA_NAME
5554 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5555 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5556 && data
->body_includes_call
)
5557 return COSTS_N_INSNS (1);
5562 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5565 determine_group_iv_cost_cond (struct ivopts_data
*data
,
5566 struct iv_group
*group
, struct iv_cand
*cand
)
5568 tree bound
= NULL_TREE
;
5570 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5571 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5573 iv_inv_expr_ent
*elim_inv_expr
= NULL
, *express_inv_expr
= NULL
, *inv_expr
;
5574 tree
*control_var
, *bound_cst
;
5575 enum tree_code comp
= ERROR_MARK
;
5576 struct iv_use
*use
= group
->vuses
[0];
5578 gcc_assert (cand
->iv
);
5580 /* Try iv elimination. */
5581 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5583 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5584 if (elim_cost
.cost
== 0)
5585 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5586 else if (TREE_CODE (bound
) == INTEGER_CST
)
5588 /* If we replace a loop condition 'i < n' with 'p < base + n',
5589 depends_on_elim will have 'base' and 'n' set, which implies
5590 that both 'base' and 'n' will be live during the loop. More likely,
5591 'base + n' will be loop invariant, resulting in only one live value
5592 during the loop. So in that case we clear depends_on_elim and set
5593 elim_inv_expr_id instead. */
5594 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5596 elim_inv_expr
= record_inv_expr (data
, bound
);
5597 bitmap_clear (depends_on_elim
);
5599 /* The bound is a loop invariant, so it will be only computed
5601 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5604 elim_cost
= infinite_cost
;
5606 /* Try expressing the original giv. If it is compared with an invariant,
5607 note that we cannot get rid of it. */
5608 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5612 /* When the condition is a comparison of the candidate IV against
5613 zero, prefer this IV.
5615 TODO: The constant that we're subtracting from the cost should
5616 be target-dependent. This information should be added to the
5617 target costs for each backend. */
5618 if (!elim_cost
.infinite_cost_p () /* Do not try to decrease infinite! */
5619 && integer_zerop (*bound_cst
)
5620 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5621 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5624 express_cost
= get_computation_cost (data
, use
, cand
, false,
5625 &depends_on_express
, NULL
,
5627 fd_ivopts_data
= data
;
5628 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5630 /* Count the cost of the original bound as well. */
5631 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5632 if (bound_cost
.cost
== 0)
5633 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5634 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5635 bound_cost
.cost
= 0;
5636 express_cost
+= bound_cost
;
5638 /* Choose the better approach, preferring the eliminated IV. */
5639 if (elim_cost
<= express_cost
)
5642 depends_on
= depends_on_elim
;
5643 depends_on_elim
= NULL
;
5644 inv_expr
= elim_inv_expr
;
5648 cost
= express_cost
;
5649 depends_on
= depends_on_express
;
5650 depends_on_express
= NULL
;
5653 inv_expr
= express_inv_expr
;
5656 set_group_iv_cost (data
, group
, cand
, cost
,
5657 depends_on
, bound
, comp
, inv_expr
);
5659 if (depends_on_elim
)
5660 BITMAP_FREE (depends_on_elim
);
5661 if (depends_on_express
)
5662 BITMAP_FREE (depends_on_express
);
5664 return !cost
.infinite_cost_p ();
5667 /* Determines cost of computing uses in GROUP with CAND. Returns false
5668 if USE cannot be represented with CAND. */
5671 determine_group_iv_cost (struct ivopts_data
*data
,
5672 struct iv_group
*group
, struct iv_cand
*cand
)
5674 switch (group
->type
)
5676 case USE_NONLINEAR_EXPR
:
5677 return determine_group_iv_cost_generic (data
, group
, cand
);
5680 return determine_group_iv_cost_address (data
, group
, cand
);
5683 return determine_group_iv_cost_cond (data
, group
, cand
);
5690 /* Return true if get_computation_cost indicates that autoincrement is
5691 a possibility for the pair of USE and CAND, false otherwise. */
5694 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5695 struct iv_cand
*cand
)
5701 if (use
->type
!= USE_ADDRESS
)
5704 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5705 &can_autoinc
, NULL
);
5707 BITMAP_FREE (depends_on
);
5709 return !cost
.infinite_cost_p () && can_autoinc
;
5712 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5713 use that allows autoincrement, and set their AINC_USE if possible. */
5716 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5720 for (i
= 0; i
< data
->vcands
.length (); i
++)
5722 struct iv_cand
*cand
= data
->vcands
[i
];
5723 struct iv_use
*closest_before
= NULL
;
5724 struct iv_use
*closest_after
= NULL
;
5725 if (cand
->pos
!= IP_ORIGINAL
)
5728 for (j
= 0; j
< data
->vgroups
.length (); j
++)
5730 struct iv_group
*group
= data
->vgroups
[j
];
5731 struct iv_use
*use
= group
->vuses
[0];
5732 unsigned uid
= gimple_uid (use
->stmt
);
5734 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5737 if (uid
< gimple_uid (cand
->incremented_at
)
5738 && (closest_before
== NULL
5739 || uid
> gimple_uid (closest_before
->stmt
)))
5740 closest_before
= use
;
5742 if (uid
> gimple_uid (cand
->incremented_at
)
5743 && (closest_after
== NULL
5744 || uid
< gimple_uid (closest_after
->stmt
)))
5745 closest_after
= use
;
5748 if (closest_before
!= NULL
5749 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5750 cand
->ainc_use
= closest_before
;
5751 else if (closest_after
!= NULL
5752 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5753 cand
->ainc_use
= closest_after
;
5757 /* Finds the candidates for the induction variables. */
5760 find_iv_candidates (struct ivopts_data
*data
)
5762 /* Add commonly used ivs. */
5763 add_standard_iv_candidates (data
);
5765 /* Add old induction variables. */
5766 add_iv_candidate_for_bivs (data
);
5768 /* Add induction variables derived from uses. */
5769 add_iv_candidate_for_groups (data
);
5771 set_autoinc_for_original_candidates (data
);
5773 /* Record the important candidates. */
5774 record_important_candidates (data
);
5776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5780 fprintf (dump_file
, "\n<Important Candidates>:\t");
5781 for (i
= 0; i
< data
->vcands
.length (); i
++)
5782 if (data
->vcands
[i
]->important
)
5783 fprintf (dump_file
, " %d,", data
->vcands
[i
]->id
);
5784 fprintf (dump_file
, "\n");
5786 fprintf (dump_file
, "\n<Group, Cand> Related:\n");
5787 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5789 struct iv_group
*group
= data
->vgroups
[i
];
5791 if (group
->related_cands
)
5793 fprintf (dump_file
, " Group %d:\t", group
->id
);
5794 dump_bitmap (dump_file
, group
->related_cands
);
5797 fprintf (dump_file
, "\n");
5801 /* Determines costs of computing use of iv with an iv candidate. */
5804 determine_group_iv_costs (struct ivopts_data
*data
)
5807 struct iv_cand
*cand
;
5808 struct iv_group
*group
;
5809 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5811 alloc_use_cost_map (data
);
5813 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5815 group
= data
->vgroups
[i
];
5817 if (data
->consider_all_candidates
)
5819 for (j
= 0; j
< data
->vcands
.length (); j
++)
5821 cand
= data
->vcands
[j
];
5822 determine_group_iv_cost (data
, group
, cand
);
5829 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, j
, bi
)
5831 cand
= data
->vcands
[j
];
5832 if (!determine_group_iv_cost (data
, group
, cand
))
5833 bitmap_set_bit (to_clear
, j
);
5836 /* Remove the candidates for that the cost is infinite from
5837 the list of related candidates. */
5838 bitmap_and_compl_into (group
->related_cands
, to_clear
);
5839 bitmap_clear (to_clear
);
5843 BITMAP_FREE (to_clear
);
5845 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5847 fprintf (dump_file
, "\n<Invariant Expressions>:\n");
5848 auto_vec
<iv_inv_expr_ent
*> list (data
->inv_expr_tab
->elements ());
5850 for (hash_table
<iv_inv_expr_hasher
>::iterator it
5851 = data
->inv_expr_tab
->begin (); it
!= data
->inv_expr_tab
->end ();
5853 list
.safe_push (*it
);
5855 list
.qsort (sort_iv_inv_expr_ent
);
5857 for (i
= 0; i
< list
.length (); ++i
)
5859 fprintf (dump_file
, "inv_expr %d: \t", i
);
5860 print_generic_expr (dump_file
, list
[i
]->expr
, TDF_SLIM
);
5861 fprintf (dump_file
, "\n");
5864 fprintf (dump_file
, "\n<Group-candidate Costs>:\n");
5866 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5868 group
= data
->vgroups
[i
];
5870 fprintf (dump_file
, "Group %d:\n", i
);
5871 fprintf (dump_file
, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5872 for (j
= 0; j
< group
->n_map_members
; j
++)
5874 if (!group
->cost_map
[j
].cand
5875 || group
->cost_map
[j
].cost
.infinite_cost_p ())
5878 fprintf (dump_file
, " %d\t%d\t%d\t",
5879 group
->cost_map
[j
].cand
->id
,
5880 group
->cost_map
[j
].cost
.cost
,
5881 group
->cost_map
[j
].cost
.complexity
);
5882 if (group
->cost_map
[j
].inv_expr
!= NULL
)
5883 fprintf (dump_file
, "%d\t",
5884 group
->cost_map
[j
].inv_expr
->id
);
5886 fprintf (dump_file
, "\t");
5887 if (group
->cost_map
[j
].depends_on
)
5888 bitmap_print (dump_file
,
5889 group
->cost_map
[j
].depends_on
, "","");
5890 fprintf (dump_file
, "\n");
5893 fprintf (dump_file
, "\n");
5895 fprintf (dump_file
, "\n");
5899 /* Determines cost of the candidate CAND. */
5902 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5904 comp_cost cost_base
;
5905 unsigned cost
, cost_step
;
5914 /* There are two costs associated with the candidate -- its increment
5915 and its initialization. The second is almost negligible for any loop
5916 that rolls enough, so we take it just very little into account. */
5918 base
= cand
->iv
->base
;
5919 cost_base
= force_var_cost (data
, base
, NULL
);
5920 /* It will be exceptional that the iv register happens to be initialized with
5921 the proper value at no cost. In general, there will at least be a regcopy
5923 if (cost_base
.cost
== 0)
5924 cost_base
.cost
= COSTS_N_INSNS (1);
5925 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5927 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5929 /* Prefer the original ivs unless we may gain something by replacing it.
5930 The reason is to make debugging simpler; so this is not relevant for
5931 artificial ivs created by other optimization passes. */
5932 if (cand
->pos
!= IP_ORIGINAL
5933 || !SSA_NAME_VAR (cand
->var_before
)
5934 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5937 /* Prefer not to insert statements into latch unless there are some
5938 already (so that we do not create unnecessary jumps). */
5939 if (cand
->pos
== IP_END
5940 && empty_block_p (ip_end_pos (data
->current_loop
)))
5944 cand
->cost_step
= cost_step
;
5947 /* Determines costs of computation of the candidates. */
5950 determine_iv_costs (struct ivopts_data
*data
)
5954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5956 fprintf (dump_file
, "<Candidate Costs>:\n");
5957 fprintf (dump_file
, " cand\tcost\n");
5960 for (i
= 0; i
< data
->vcands
.length (); i
++)
5962 struct iv_cand
*cand
= data
->vcands
[i
];
5964 determine_iv_cost (data
, cand
);
5966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5967 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5970 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5971 fprintf (dump_file
, "\n");
5974 /* Calculates cost for having SIZE induction variables. */
5977 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5979 /* We add size to the cost, so that we prefer eliminating ivs
5981 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5982 data
->body_includes_call
);
5985 /* For each size of the induction variable set determine the penalty. */
5988 determine_set_costs (struct ivopts_data
*data
)
5994 struct loop
*loop
= data
->current_loop
;
5997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5999 fprintf (dump_file
, "<Global Costs>:\n");
6000 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
6001 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
6002 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
6003 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
6007 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
6010 op
= PHI_RESULT (phi
);
6012 if (virtual_operand_p (op
))
6015 if (get_iv (data
, op
))
6021 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6023 struct version_info
*info
= ver_info (data
, j
);
6025 if (info
->inv_id
&& info
->has_nonlin_use
)
6029 data
->regs_used
= n
;
6030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6031 fprintf (dump_file
, " regs_used %d\n", n
);
6033 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6035 fprintf (dump_file
, " cost for size:\n");
6036 fprintf (dump_file
, " ivs\tcost\n");
6037 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
6038 fprintf (dump_file
, " %d\t%d\n", j
,
6039 ivopts_global_cost_for_size (data
, j
));
6040 fprintf (dump_file
, "\n");
6044 /* Returns true if A is a cheaper cost pair than B. */
6047 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
6055 if (a
->cost
< b
->cost
)
6058 if (b
->cost
< a
->cost
)
6061 /* In case the costs are the same, prefer the cheaper candidate. */
6062 if (a
->cand
->cost
< b
->cand
->cost
)
6069 /* Returns candidate by that USE is expressed in IVS. */
6071 static struct cost_pair
*
6072 iv_ca_cand_for_group (struct iv_ca
*ivs
, struct iv_group
*group
)
6074 return ivs
->cand_for_group
[group
->id
];
6077 /* Computes the cost field of IVS structure. */
6080 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6082 comp_cost cost
= ivs
->cand_use_cost
;
6084 cost
+= ivs
->cand_cost
;
6086 cost
+= ivopts_global_cost_for_size (data
,
6088 + ivs
->used_inv_exprs
->elements ());
6093 /* Remove invariants in set INVS to set IVS. */
6096 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
6104 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6106 ivs
->n_invariant_uses
[iid
]--;
6107 if (ivs
->n_invariant_uses
[iid
] == 0)
6112 /* Set USE not to be expressed by any candidate in IVS. */
6115 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6116 struct iv_group
*group
)
6118 unsigned gid
= group
->id
, cid
;
6119 struct cost_pair
*cp
;
6121 cp
= ivs
->cand_for_group
[gid
];
6127 ivs
->cand_for_group
[gid
] = NULL
;
6128 ivs
->n_cand_uses
[cid
]--;
6130 if (ivs
->n_cand_uses
[cid
] == 0)
6132 bitmap_clear_bit (ivs
->cands
, cid
);
6133 /* Do not count the pseudocandidates. */
6137 ivs
->cand_cost
-= cp
->cand
->cost
;
6139 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
6142 ivs
->cand_use_cost
-= cp
->cost
;
6144 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
6146 if (cp
->inv_expr
!= NULL
)
6148 unsigned *slot
= ivs
->used_inv_exprs
->get (cp
->inv_expr
);
6151 ivs
->used_inv_exprs
->remove (cp
->inv_expr
);
6153 iv_ca_recount_cost (data
, ivs
);
6156 /* Add invariants in set INVS to set IVS. */
6159 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
6167 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6169 ivs
->n_invariant_uses
[iid
]++;
6170 if (ivs
->n_invariant_uses
[iid
] == 1)
6175 /* Set cost pair for GROUP in set IVS to CP. */
6178 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6179 struct iv_group
*group
, struct cost_pair
*cp
)
6181 unsigned gid
= group
->id
, cid
;
6183 if (ivs
->cand_for_group
[gid
] == cp
)
6186 if (ivs
->cand_for_group
[gid
])
6187 iv_ca_set_no_cp (data
, ivs
, group
);
6194 ivs
->cand_for_group
[gid
] = cp
;
6195 ivs
->n_cand_uses
[cid
]++;
6196 if (ivs
->n_cand_uses
[cid
] == 1)
6198 bitmap_set_bit (ivs
->cands
, cid
);
6199 /* Do not count the pseudocandidates. */
6203 ivs
->cand_cost
+= cp
->cand
->cost
;
6205 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
6208 ivs
->cand_use_cost
+= cp
->cost
;
6209 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
6211 if (cp
->inv_expr
!= NULL
)
6213 unsigned *slot
= &ivs
->used_inv_exprs
->get_or_insert (cp
->inv_expr
);
6216 iv_ca_recount_cost (data
, ivs
);
6220 /* Extend set IVS by expressing USE by some of the candidates in it
6221 if possible. Consider all important candidates if candidates in
6222 set IVS don't give any result. */
6225 iv_ca_add_group (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6226 struct iv_group
*group
)
6228 struct cost_pair
*best_cp
= NULL
, *cp
;
6231 struct iv_cand
*cand
;
6233 gcc_assert (ivs
->upto
>= group
->id
);
6237 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6239 cand
= data
->vcands
[i
];
6240 cp
= get_group_iv_cost (data
, group
, cand
);
6241 if (cheaper_cost_pair (cp
, best_cp
))
6245 if (best_cp
== NULL
)
6247 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6249 cand
= data
->vcands
[i
];
6250 cp
= get_group_iv_cost (data
, group
, cand
);
6251 if (cheaper_cost_pair (cp
, best_cp
))
6256 iv_ca_set_cp (data
, ivs
, group
, best_cp
);
6259 /* Get cost for assignment IVS. */
6262 iv_ca_cost (struct iv_ca
*ivs
)
6264 /* This was a conditional expression but it triggered a bug in
6266 if (ivs
->bad_groups
)
6267 return infinite_cost
;
6272 /* Returns true if all dependences of CP are among invariants in IVS. */
6275 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6280 if (!cp
->depends_on
)
6283 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6285 if (ivs
->n_invariant_uses
[i
] == 0)
6292 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6295 static struct iv_ca_delta
*
6296 iv_ca_delta_add (struct iv_group
*group
, struct cost_pair
*old_cp
,
6297 struct cost_pair
*new_cp
, struct iv_ca_delta
*next
)
6299 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6301 change
->group
= group
;
6302 change
->old_cp
= old_cp
;
6303 change
->new_cp
= new_cp
;
6304 change
->next
= next
;
6309 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6312 static struct iv_ca_delta
*
6313 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6315 struct iv_ca_delta
*last
;
6323 for (last
= l1
; last
->next
; last
= last
->next
)
6330 /* Reverse the list of changes DELTA, forming the inverse to it. */
6332 static struct iv_ca_delta
*
6333 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6335 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6337 for (act
= delta
; act
; act
= next
)
6343 std::swap (act
->old_cp
, act
->new_cp
);
6349 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6350 reverted instead. */
6353 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6354 struct iv_ca_delta
*delta
, bool forward
)
6356 struct cost_pair
*from
, *to
;
6357 struct iv_ca_delta
*act
;
6360 delta
= iv_ca_delta_reverse (delta
);
6362 for (act
= delta
; act
; act
= act
->next
)
6366 gcc_assert (iv_ca_cand_for_group (ivs
, act
->group
) == from
);
6367 iv_ca_set_cp (data
, ivs
, act
->group
, to
);
6371 iv_ca_delta_reverse (delta
);
6374 /* Returns true if CAND is used in IVS. */
6377 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6379 return ivs
->n_cand_uses
[cand
->id
] > 0;
6382 /* Returns number of induction variable candidates in the set IVS. */
6385 iv_ca_n_cands (struct iv_ca
*ivs
)
6387 return ivs
->n_cands
;
6390 /* Free the list of changes DELTA. */
6393 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6395 struct iv_ca_delta
*act
, *next
;
6397 for (act
= *delta
; act
; act
= next
)
6406 /* Allocates new iv candidates assignment. */
6408 static struct iv_ca
*
6409 iv_ca_new (struct ivopts_data
*data
)
6411 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6415 nw
->cand_for_group
= XCNEWVEC (struct cost_pair
*,
6416 data
->vgroups
.length ());
6417 nw
->n_cand_uses
= XCNEWVEC (unsigned, data
->vcands
.length ());
6418 nw
->cands
= BITMAP_ALLOC (NULL
);
6421 nw
->cand_use_cost
= no_cost
;
6423 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6424 nw
->used_inv_exprs
= new hash_map
<iv_inv_expr_ent
*, unsigned> (13);
6430 /* Free memory occupied by the set IVS. */
6433 iv_ca_free (struct iv_ca
**ivs
)
6435 free ((*ivs
)->cand_for_group
);
6436 free ((*ivs
)->n_cand_uses
);
6437 BITMAP_FREE ((*ivs
)->cands
);
6438 free ((*ivs
)->n_invariant_uses
);
6439 delete ((*ivs
)->used_inv_exprs
);
6444 /* Dumps IVS to FILE. */
6447 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6450 comp_cost cost
= iv_ca_cost (ivs
);
6452 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
,
6454 fprintf (file
, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6455 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
,
6456 ivs
->cand_use_cost
.complexity
);
6457 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6459 for (i
= 0; i
< ivs
->upto
; i
++)
6461 struct iv_group
*group
= data
->vgroups
[i
];
6462 struct cost_pair
*cp
= iv_ca_cand_for_group (ivs
, group
);
6464 fprintf (file
, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6465 group
->id
, cp
->cand
->id
, cp
->cost
.cost
,
6466 cp
->cost
.complexity
);
6468 fprintf (file
, " group:%d --> ??\n", group
->id
);
6471 const char *pref
= "";
6472 fprintf (file
, " invariant variables: ");
6473 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6474 if (ivs
->n_invariant_uses
[i
])
6476 fprintf (file
, "%s%d", pref
, i
);
6481 fprintf (file
, "\n invariant expressions: ");
6482 for (hash_map
<iv_inv_expr_ent
*, unsigned>::iterator it
6483 = ivs
->used_inv_exprs
->begin (); it
!= ivs
->used_inv_exprs
->end (); ++it
)
6485 fprintf (file
, "%s%d", pref
, (*it
).first
->id
);
6489 fprintf (file
, "\n\n");
6492 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6493 new set, and store differences in DELTA. Number of induction variables
6494 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6495 the function will try to find a solution with mimimal iv candidates. */
6498 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6499 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6500 unsigned *n_ivs
, bool min_ncand
)
6504 struct iv_group
*group
;
6505 struct cost_pair
*old_cp
, *new_cp
;
6508 for (i
= 0; i
< ivs
->upto
; i
++)
6510 group
= data
->vgroups
[i
];
6511 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6514 && old_cp
->cand
== cand
)
6517 new_cp
= get_group_iv_cost (data
, group
, cand
);
6521 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6524 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6527 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6530 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6531 cost
= iv_ca_cost (ivs
);
6533 *n_ivs
= iv_ca_n_cands (ivs
);
6534 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6539 /* Try narrowing set IVS by removing CAND. Return the cost of
6540 the new set and store the differences in DELTA. START is
6541 the candidate with which we start narrowing. */
6544 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6545 struct iv_cand
*cand
, struct iv_cand
*start
,
6546 struct iv_ca_delta
**delta
)
6549 struct iv_group
*group
;
6550 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6552 struct iv_cand
*cnd
;
6553 comp_cost cost
, best_cost
, acost
;
6556 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6558 group
= data
->vgroups
[i
];
6560 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6561 if (old_cp
->cand
!= cand
)
6564 best_cost
= iv_ca_cost (ivs
);
6565 /* Start narrowing with START. */
6566 new_cp
= get_group_iv_cost (data
, group
, start
);
6568 if (data
->consider_all_candidates
)
6570 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6572 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6575 cnd
= data
->vcands
[ci
];
6577 cp
= get_group_iv_cost (data
, group
, cnd
);
6581 iv_ca_set_cp (data
, ivs
, group
, cp
);
6582 acost
= iv_ca_cost (ivs
);
6584 if (acost
< best_cost
)
6593 EXECUTE_IF_AND_IN_BITMAP (group
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6595 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6598 cnd
= data
->vcands
[ci
];
6600 cp
= get_group_iv_cost (data
, group
, cnd
);
6604 iv_ca_set_cp (data
, ivs
, group
, cp
);
6605 acost
= iv_ca_cost (ivs
);
6607 if (acost
< best_cost
)
6614 /* Restore to old cp for use. */
6615 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
6619 iv_ca_delta_free (delta
);
6620 return infinite_cost
;
6623 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6626 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6627 cost
= iv_ca_cost (ivs
);
6628 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6633 /* Try optimizing the set of candidates IVS by removing candidates different
6634 from to EXCEPT_CAND from it. Return cost of the new set, and store
6635 differences in DELTA. */
6638 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6639 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6642 struct iv_ca_delta
*act_delta
, *best_delta
;
6644 comp_cost best_cost
, acost
;
6645 struct iv_cand
*cand
;
6648 best_cost
= iv_ca_cost (ivs
);
6650 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6652 cand
= data
->vcands
[i
];
6654 if (cand
== except_cand
)
6657 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6659 if (acost
< best_cost
)
6662 iv_ca_delta_free (&best_delta
);
6663 best_delta
= act_delta
;
6666 iv_ca_delta_free (&act_delta
);
6675 /* Recurse to possibly remove other unnecessary ivs. */
6676 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6677 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6678 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6679 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6683 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6684 cheaper local cost for GROUP than BEST_CP. Return pointer to
6685 the corresponding cost_pair, otherwise just return BEST_CP. */
6687 static struct cost_pair
*
6688 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_group
*group
,
6689 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6690 struct cost_pair
*best_cp
)
6692 struct iv_cand
*cand
;
6693 struct cost_pair
*cp
;
6695 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6696 if (cand_idx
== old_cand
->id
)
6699 cand
= data
->vcands
[cand_idx
];
6700 cp
= get_group_iv_cost (data
, group
, cand
);
6701 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6707 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6708 which are used by more than one iv uses. For each of those candidates,
6709 this function tries to represent iv uses under that candidate using
6710 other ones with lower local cost, then tries to prune the new set.
6711 If the new set has lower cost, It returns the new cost after recording
6712 candidate replacement in list DELTA. */
6715 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6716 struct iv_ca_delta
**delta
)
6718 bitmap_iterator bi
, bj
;
6719 unsigned int i
, j
, k
;
6720 struct iv_cand
*cand
;
6721 comp_cost orig_cost
, acost
;
6722 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6723 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6726 orig_cost
= iv_ca_cost (ivs
);
6728 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6730 if (ivs
->n_cand_uses
[i
] == 1
6731 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6734 cand
= data
->vcands
[i
];
6737 /* Represent uses under current candidate using other ones with
6738 lower local cost. */
6739 for (j
= 0; j
< ivs
->upto
; j
++)
6741 struct iv_group
*group
= data
->vgroups
[j
];
6742 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6744 if (old_cp
->cand
!= cand
)
6748 if (data
->consider_all_candidates
)
6749 for (k
= 0; k
< data
->vcands
.length (); k
++)
6750 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6751 old_cp
->cand
, best_cp
);
6753 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, k
, bj
)
6754 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6755 old_cp
->cand
, best_cp
);
6757 if (best_cp
== old_cp
)
6760 act_delta
= iv_ca_delta_add (group
, old_cp
, best_cp
, act_delta
);
6762 /* No need for further prune. */
6766 /* Prune the new candidate set. */
6767 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6768 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6769 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6770 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6772 if (acost
< orig_cost
)
6778 iv_ca_delta_free (&act_delta
);
6784 /* Tries to extend the sets IVS in the best possible way in order to
6785 express the GROUP. If ORIGINALP is true, prefer candidates from
6786 the original set of IVs, otherwise favor important candidates not
6787 based on any memory object. */
6790 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6791 struct iv_group
*group
, bool originalp
)
6793 comp_cost best_cost
, act_cost
;
6796 struct iv_cand
*cand
;
6797 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6798 struct cost_pair
*cp
;
6800 iv_ca_add_group (data
, ivs
, group
);
6801 best_cost
= iv_ca_cost (ivs
);
6802 cp
= iv_ca_cand_for_group (ivs
, group
);
6805 best_delta
= iv_ca_delta_add (group
, NULL
, cp
, NULL
);
6806 iv_ca_set_no_cp (data
, ivs
, group
);
6809 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6810 first try important candidates not based on any memory object. Only if
6811 this fails, try the specific ones. Rationale -- in loops with many
6812 variables the best choice often is to use just one generic biv. If we
6813 added here many ivs specific to the uses, the optimization algorithm later
6814 would be likely to get stuck in a local minimum, thus causing us to create
6815 too many ivs. The approach from few ivs to more seems more likely to be
6816 successful -- starting from few ivs, replacing an expensive use by a
6817 specific iv should always be a win. */
6818 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, i
, bi
)
6820 cand
= data
->vcands
[i
];
6822 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6825 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6828 if (iv_ca_cand_used_p (ivs
, cand
))
6831 cp
= get_group_iv_cost (data
, group
, cand
);
6835 iv_ca_set_cp (data
, ivs
, group
, cp
);
6836 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6838 iv_ca_set_no_cp (data
, ivs
, group
);
6839 act_delta
= iv_ca_delta_add (group
, NULL
, cp
, act_delta
);
6841 if (act_cost
< best_cost
)
6843 best_cost
= act_cost
;
6845 iv_ca_delta_free (&best_delta
);
6846 best_delta
= act_delta
;
6849 iv_ca_delta_free (&act_delta
);
6852 if (best_cost
.infinite_cost_p ())
6854 for (i
= 0; i
< group
->n_map_members
; i
++)
6856 cp
= group
->cost_map
+ i
;
6861 /* Already tried this. */
6862 if (cand
->important
)
6864 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6866 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6870 if (iv_ca_cand_used_p (ivs
, cand
))
6874 iv_ca_set_cp (data
, ivs
, group
, cp
);
6875 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6876 iv_ca_set_no_cp (data
, ivs
, group
);
6877 act_delta
= iv_ca_delta_add (group
,
6878 iv_ca_cand_for_group (ivs
, group
),
6881 if (act_cost
< best_cost
)
6883 best_cost
= act_cost
;
6886 iv_ca_delta_free (&best_delta
);
6887 best_delta
= act_delta
;
6890 iv_ca_delta_free (&act_delta
);
6894 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6895 iv_ca_delta_free (&best_delta
);
6897 return !best_cost
.infinite_cost_p ();
6900 /* Finds an initial assignment of candidates to uses. */
6902 static struct iv_ca
*
6903 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6906 struct iv_ca
*ivs
= iv_ca_new (data
);
6908 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6909 if (!try_add_cand_for (data
, ivs
, data
->vgroups
[i
], originalp
))
6918 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6919 points to a bool variable, this function tries to break local
6920 optimal fixed-point by replacing candidates in IVS if it's true. */
6923 try_improve_iv_set (struct ivopts_data
*data
,
6924 struct iv_ca
*ivs
, bool *try_replace_p
)
6927 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6928 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6929 struct iv_cand
*cand
;
6931 /* Try extending the set of induction variables by one. */
6932 for (i
= 0; i
< data
->vcands
.length (); i
++)
6934 cand
= data
->vcands
[i
];
6936 if (iv_ca_cand_used_p (ivs
, cand
))
6939 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6943 /* If we successfully added the candidate and the set is small enough,
6944 try optimizing it by removing other candidates. */
6945 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6947 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6948 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6949 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6950 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6953 if (acost
< best_cost
)
6956 iv_ca_delta_free (&best_delta
);
6957 best_delta
= act_delta
;
6960 iv_ca_delta_free (&act_delta
);
6965 /* Try removing the candidates from the set instead. */
6966 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6968 if (!best_delta
&& *try_replace_p
)
6970 *try_replace_p
= false;
6971 /* So far candidate selecting algorithm tends to choose fewer IVs
6972 so that it can handle cases in which loops have many variables
6973 but the best choice is often to use only one general biv. One
6974 weakness is it can't handle opposite cases, in which different
6975 candidates should be chosen with respect to each use. To solve
6976 the problem, we replace candidates in a manner described by the
6977 comments of iv_ca_replace, thus give general algorithm a chance
6978 to break local optimal fixed-point in these cases. */
6979 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6986 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6987 gcc_assert (best_cost
== iv_ca_cost (ivs
));
6988 iv_ca_delta_free (&best_delta
);
6992 /* Attempts to find the optimal set of induction variables. We do simple
6993 greedy heuristic -- we try to replace at most one candidate in the selected
6994 solution and remove the unused ivs while this improves the cost. */
6996 static struct iv_ca
*
6997 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
7000 bool try_replace_p
= true;
7002 /* Get the initial solution. */
7003 set
= get_initial_solution (data
, originalp
);
7006 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7007 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
7011 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7013 fprintf (dump_file
, "Initial set of candidates:\n");
7014 iv_ca_dump (data
, dump_file
, set
);
7017 while (try_improve_iv_set (data
, set
, &try_replace_p
))
7019 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7021 fprintf (dump_file
, "Improved to:\n");
7022 iv_ca_dump (data
, dump_file
, set
);
7029 static struct iv_ca
*
7030 find_optimal_iv_set (struct ivopts_data
*data
)
7033 comp_cost cost
, origcost
;
7034 struct iv_ca
*set
, *origset
;
7036 /* Determine the cost based on a strategy that starts with original IVs,
7037 and try again using a strategy that prefers candidates not based
7039 origset
= find_optimal_iv_set_1 (data
, true);
7040 set
= find_optimal_iv_set_1 (data
, false);
7042 if (!origset
&& !set
)
7045 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
7046 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
7048 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7050 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
7051 origcost
.cost
, origcost
.complexity
);
7052 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
7053 cost
.cost
, cost
.complexity
);
7056 /* Choose the one with the best cost. */
7057 if (origcost
<= cost
)
7064 iv_ca_free (&origset
);
7066 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7068 struct iv_group
*group
= data
->vgroups
[i
];
7069 group
->selected
= iv_ca_cand_for_group (set
, group
)->cand
;
7075 /* Creates a new induction variable corresponding to CAND. */
7078 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
7080 gimple_stmt_iterator incr_pos
;
7083 struct iv_group
*group
;
7092 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
7096 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
7104 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
7108 /* Mark that the iv is preserved. */
7109 name_info (data
, cand
->var_before
)->preserve_biv
= true;
7110 name_info (data
, cand
->var_after
)->preserve_biv
= true;
7112 /* Rewrite the increment so that it uses var_before directly. */
7113 use
= find_interesting_uses_op (data
, cand
->var_after
);
7114 group
= data
->vgroups
[use
->group_id
];
7115 group
->selected
= cand
;
7119 gimple_add_tmp_var (cand
->var_before
);
7121 base
= unshare_expr (cand
->iv
->base
);
7123 create_iv (base
, unshare_expr (cand
->iv
->step
),
7124 cand
->var_before
, data
->current_loop
,
7125 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
7128 /* Creates new induction variables described in SET. */
7131 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
7134 struct iv_cand
*cand
;
7137 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7139 cand
= data
->vcands
[i
];
7140 create_new_iv (data
, cand
);
7143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7145 fprintf (dump_file
, "Selected IV set for loop %d",
7146 data
->current_loop
->num
);
7147 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7148 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7149 LOCATION_LINE (data
->loop_loc
));
7150 fprintf (dump_file
, ", " HOST_WIDE_INT_PRINT_DEC
" avg niters",
7151 avg_loop_niter (data
->current_loop
));
7152 fprintf (dump_file
, ", " HOST_WIDE_INT_PRINT_UNSIGNED
" expressions",
7153 (unsigned HOST_WIDE_INT
) set
->used_inv_exprs
->elements ());
7154 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
7155 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7157 cand
= data
->vcands
[i
];
7158 dump_cand (dump_file
, cand
);
7160 fprintf (dump_file
, "\n");
7164 /* Rewrites USE (definition of iv used in a nonlinear expression)
7165 using candidate CAND. */
7168 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
7169 struct iv_use
*use
, struct iv_cand
*cand
)
7174 gimple_stmt_iterator bsi
;
7176 /* An important special case -- if we are asked to express value of
7177 the original iv by itself, just exit; there is no need to
7178 introduce a new computation (that might also need casting the
7179 variable to unsigned and back). */
7180 if (cand
->pos
== IP_ORIGINAL
7181 && cand
->incremented_at
== use
->stmt
)
7183 enum tree_code stmt_code
;
7185 gcc_assert (is_gimple_assign (use
->stmt
));
7186 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
7188 /* Check whether we may leave the computation unchanged.
7189 This is the case only if it does not rely on other
7190 computations in the loop -- otherwise, the computation
7191 we rely upon may be removed in remove_unused_ivs,
7192 thus leading to ICE. */
7193 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
7194 if (stmt_code
== PLUS_EXPR
7195 || stmt_code
== MINUS_EXPR
7196 || stmt_code
== POINTER_PLUS_EXPR
)
7198 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
7199 op
= gimple_assign_rhs2 (use
->stmt
);
7200 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
7201 op
= gimple_assign_rhs1 (use
->stmt
);
7208 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
7212 comp
= get_computation (data
->current_loop
, use
, cand
);
7213 gcc_assert (comp
!= NULL_TREE
);
7215 switch (gimple_code (use
->stmt
))
7218 tgt
= PHI_RESULT (use
->stmt
);
7220 /* If we should keep the biv, do not replace it. */
7221 if (name_info (data
, tgt
)->preserve_biv
)
7224 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
7228 tgt
= gimple_assign_lhs (use
->stmt
);
7229 bsi
= gsi_for_stmt (use
->stmt
);
7236 if (!valid_gimple_rhs_p (comp
)
7237 || (gimple_code (use
->stmt
) != GIMPLE_PHI
7238 /* We can't allow re-allocating the stmt as it might be pointed
7240 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
7241 >= gimple_num_ops (gsi_stmt (bsi
)))))
7243 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
7244 true, GSI_SAME_STMT
);
7245 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
7247 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
7248 /* As this isn't a plain copy we have to reset alignment
7250 if (SSA_NAME_PTR_INFO (comp
))
7251 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
7255 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
7257 ass
= gimple_build_assign (tgt
, comp
);
7258 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
7260 bsi
= gsi_for_stmt (use
->stmt
);
7261 remove_phi_node (&bsi
, false);
7265 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
7266 use
->stmt
= gsi_stmt (bsi
);
7270 /* Performs a peephole optimization to reorder the iv update statement with
7271 a mem ref to enable instruction combining in later phases. The mem ref uses
7272 the iv value before the update, so the reordering transformation requires
7273 adjustment of the offset. CAND is the selected IV_CAND.
7277 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7285 directly propagating t over to (1) will introduce overlapping live range
7286 thus increase register pressure. This peephole transform it into:
7290 t = MEM_REF (base, iv2, 8, 8);
7297 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7300 gimple
*iv_update
, *stmt
;
7302 gimple_stmt_iterator gsi
, gsi_iv
;
7304 if (cand
->pos
!= IP_NORMAL
)
7307 var_after
= cand
->var_after
;
7308 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7310 bb
= gimple_bb (iv_update
);
7311 gsi
= gsi_last_nondebug_bb (bb
);
7312 stmt
= gsi_stmt (gsi
);
7314 /* Only handle conditional statement for now. */
7315 if (gimple_code (stmt
) != GIMPLE_COND
)
7318 gsi_prev_nondebug (&gsi
);
7319 stmt
= gsi_stmt (gsi
);
7320 if (stmt
!= iv_update
)
7323 gsi_prev_nondebug (&gsi
);
7324 if (gsi_end_p (gsi
))
7327 stmt
= gsi_stmt (gsi
);
7328 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7331 if (stmt
!= use
->stmt
)
7334 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7339 fprintf (dump_file
, "Reordering \n");
7340 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7341 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7342 fprintf (dump_file
, "\n");
7345 gsi
= gsi_for_stmt (use
->stmt
);
7346 gsi_iv
= gsi_for_stmt (iv_update
);
7347 gsi_move_before (&gsi_iv
, &gsi
);
7349 cand
->pos
= IP_BEFORE_USE
;
7350 cand
->incremented_at
= use
->stmt
;
7353 /* Rewrites USE (address that is an iv) using candidate CAND. */
7356 rewrite_use_address (struct ivopts_data
*data
,
7357 struct iv_use
*use
, struct iv_cand
*cand
)
7360 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7361 tree base_hint
= NULL_TREE
;
7365 adjust_iv_update_pos (cand
, use
);
7366 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7368 unshare_aff_combination (&aff
);
7370 /* To avoid undefined overflow problems, all IV candidates use unsigned
7371 integer types. The drawback is that this makes it impossible for
7372 create_mem_ref to distinguish an IV that is based on a memory object
7373 from one that represents simply an offset.
7375 To work around this problem, we pass a hint to create_mem_ref that
7376 indicates which variable (if any) in aff is an IV based on a memory
7377 object. Note that we only consider the candidate. If this is not
7378 based on an object, the base of the reference is in some subexpression
7379 of the use -- but these will use pointer types, so they are recognized
7380 by the create_mem_ref heuristics anyway. */
7381 if (cand
->iv
->base_object
)
7382 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7384 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7385 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7386 reference_alias_ptr_type (*use
->op_p
),
7387 iv
, base_hint
, data
->speed
);
7388 copy_ref_info (ref
, *use
->op_p
);
7392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7396 rewrite_use_compare (struct ivopts_data
*data
,
7397 struct iv_use
*use
, struct iv_cand
*cand
)
7399 tree comp
, *var_p
, op
, bound
;
7400 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7401 enum tree_code compare
;
7402 struct iv_group
*group
= data
->vgroups
[use
->group_id
];
7403 struct cost_pair
*cp
= get_group_iv_cost (data
, group
, cand
);
7409 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7410 tree var_type
= TREE_TYPE (var
);
7413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7415 fprintf (dump_file
, "Replacing exit test: ");
7416 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7419 bound
= unshare_expr (fold_convert (var_type
, bound
));
7420 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7422 gsi_insert_seq_on_edge_immediate (
7423 loop_preheader_edge (data
->current_loop
),
7426 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7427 gimple_cond_set_lhs (cond_stmt
, var
);
7428 gimple_cond_set_code (cond_stmt
, compare
);
7429 gimple_cond_set_rhs (cond_stmt
, op
);
7433 /* The induction variable elimination failed; just express the original
7435 comp
= get_computation (data
->current_loop
, use
, cand
);
7436 gcc_assert (comp
!= NULL_TREE
);
7438 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7441 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7442 true, GSI_SAME_STMT
);
7445 /* Rewrite the groups using the selected induction variables. */
7448 rewrite_groups (struct ivopts_data
*data
)
7452 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7454 struct iv_group
*group
= data
->vgroups
[i
];
7455 struct iv_cand
*cand
= group
->selected
;
7459 if (group
->type
== USE_NONLINEAR_EXPR
)
7461 for (j
= 0; j
< group
->vuses
.length (); j
++)
7463 rewrite_use_nonlinear_expr (data
, group
->vuses
[j
], cand
);
7464 update_stmt (group
->vuses
[j
]->stmt
);
7467 else if (group
->type
== USE_ADDRESS
)
7469 for (j
= 0; j
< group
->vuses
.length (); j
++)
7471 rewrite_use_address (data
, group
->vuses
[j
], cand
);
7472 update_stmt (group
->vuses
[j
]->stmt
);
7477 gcc_assert (group
->type
== USE_COMPARE
);
7479 for (j
= 0; j
< group
->vuses
.length (); j
++)
7481 rewrite_use_compare (data
, group
->vuses
[j
], cand
);
7482 update_stmt (group
->vuses
[j
]->stmt
);
7488 /* Removes the ivs that are not used after rewriting. */
7491 remove_unused_ivs (struct ivopts_data
*data
)
7495 bitmap toremove
= BITMAP_ALLOC (NULL
);
7497 /* Figure out an order in which to release SSA DEFs so that we don't
7498 release something that we'd have to propagate into a debug stmt
7500 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7502 struct version_info
*info
;
7504 info
= ver_info (data
, j
);
7506 && !integer_zerop (info
->iv
->step
)
7508 && !info
->iv
->nonlin_use
7509 && !info
->preserve_biv
)
7511 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7513 tree def
= info
->iv
->ssa_name
;
7515 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7517 imm_use_iterator imm_iter
;
7518 use_operand_p use_p
;
7522 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7524 if (!gimple_debug_bind_p (stmt
))
7527 /* We just want to determine whether to do nothing
7528 (count == 0), to substitute the computed
7529 expression into a single use of the SSA DEF by
7530 itself (count == 1), or to use a debug temp
7531 because the SSA DEF is used multiple times or as
7532 part of a larger expression (count > 1). */
7534 if (gimple_debug_bind_get_value (stmt
) != def
)
7538 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7544 struct iv_use dummy_use
;
7545 struct iv_cand
*best_cand
= NULL
, *cand
;
7546 unsigned i
, best_pref
= 0, cand_pref
;
7548 memset (&dummy_use
, 0, sizeof (dummy_use
));
7549 dummy_use
.iv
= info
->iv
;
7550 for (i
= 0; i
< data
->vgroups
.length () && i
< 64; i
++)
7552 cand
= data
->vgroups
[i
]->selected
;
7553 if (cand
== best_cand
)
7555 cand_pref
= operand_equal_p (cand
->iv
->step
,
7559 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7560 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7563 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7565 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7568 best_pref
= cand_pref
;
7575 tree comp
= get_computation_at (data
->current_loop
,
7576 &dummy_use
, best_cand
,
7577 SSA_NAME_DEF_STMT (def
));
7583 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7584 DECL_ARTIFICIAL (vexpr
) = 1;
7585 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7586 if (SSA_NAME_VAR (def
))
7587 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7589 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7591 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7592 gimple_stmt_iterator gsi
;
7594 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7595 gsi
= gsi_after_labels (gimple_bb
7596 (SSA_NAME_DEF_STMT (def
)));
7598 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7600 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7604 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7606 if (!gimple_debug_bind_p (stmt
))
7609 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7610 SET_USE (use_p
, comp
);
7618 release_defs_bitset (toremove
);
7620 BITMAP_FREE (toremove
);
7623 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7624 for hash_map::traverse. */
7627 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7633 /* Frees data allocated by the optimization of a single loop. */
7636 free_loop_data (struct ivopts_data
*data
)
7644 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7645 delete data
->niters
;
7646 data
->niters
= NULL
;
7649 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7651 struct version_info
*info
;
7653 info
= ver_info (data
, i
);
7655 info
->has_nonlin_use
= false;
7656 info
->preserve_biv
= false;
7659 bitmap_clear (data
->relevant
);
7660 bitmap_clear (data
->important_candidates
);
7662 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7664 struct iv_group
*group
= data
->vgroups
[i
];
7666 for (j
= 0; j
< group
->vuses
.length (); j
++)
7667 free (group
->vuses
[j
]);
7668 group
->vuses
.release ();
7670 BITMAP_FREE (group
->related_cands
);
7671 for (j
= 0; j
< group
->n_map_members
; j
++)
7672 if (group
->cost_map
[j
].depends_on
)
7673 BITMAP_FREE (group
->cost_map
[j
].depends_on
);
7675 free (group
->cost_map
);
7678 data
->vgroups
.truncate (0);
7680 for (i
= 0; i
< data
->vcands
.length (); i
++)
7682 struct iv_cand
*cand
= data
->vcands
[i
];
7684 if (cand
->depends_on
)
7685 BITMAP_FREE (cand
->depends_on
);
7688 data
->vcands
.truncate (0);
7690 if (data
->version_info_size
< num_ssa_names
)
7692 data
->version_info_size
= 2 * num_ssa_names
;
7693 free (data
->version_info
);
7694 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7697 data
->max_inv_id
= 0;
7699 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7700 SET_DECL_RTL (obj
, NULL_RTX
);
7702 decl_rtl_to_reset
.truncate (0);
7704 data
->inv_expr_tab
->empty ();
7705 data
->max_inv_expr_id
= 0;
7707 data
->iv_common_cand_tab
->empty ();
7708 data
->iv_common_cands
.truncate (0);
7711 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7715 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7717 free_loop_data (data
);
7718 free (data
->version_info
);
7719 BITMAP_FREE (data
->relevant
);
7720 BITMAP_FREE (data
->important_candidates
);
7722 decl_rtl_to_reset
.release ();
7723 data
->vgroups
.release ();
7724 data
->vcands
.release ();
7725 delete data
->inv_expr_tab
;
7726 data
->inv_expr_tab
= NULL
;
7727 free_affine_expand_cache (&data
->name_expansion_cache
);
7728 delete data
->iv_common_cand_tab
;
7729 data
->iv_common_cand_tab
= NULL
;
7730 data
->iv_common_cands
.release ();
7731 obstack_free (&data
->iv_obstack
, NULL
);
7734 /* Returns true if the loop body BODY includes any function calls. */
7737 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7739 gimple_stmt_iterator gsi
;
7742 for (i
= 0; i
< num_nodes
; i
++)
7743 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7745 gimple
*stmt
= gsi_stmt (gsi
);
7746 if (is_gimple_call (stmt
)
7747 && !gimple_call_internal_p (stmt
)
7748 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7754 /* Optimizes the LOOP. Returns true if anything changed. */
7757 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7759 bool changed
= false;
7760 struct iv_ca
*iv_ca
;
7761 edge exit
= single_dom_exit (loop
);
7764 gcc_assert (!data
->niters
);
7765 data
->current_loop
= loop
;
7766 data
->loop_loc
= find_loop_location (loop
);
7767 data
->speed
= optimize_loop_for_speed_p (loop
);
7769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7771 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7772 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7773 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7774 LOCATION_LINE (data
->loop_loc
));
7775 fprintf (dump_file
, "\n");
7779 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7780 exit
->src
->index
, exit
->dest
->index
);
7781 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7782 fprintf (dump_file
, "\n");
7785 fprintf (dump_file
, "\n");
7788 body
= get_loop_body (loop
);
7789 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7790 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7793 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7795 /* For each ssa name determines whether it behaves as an induction variable
7797 if (!find_induction_variables (data
))
7800 /* Finds interesting uses (item 1). */
7801 find_interesting_uses (data
);
7802 if (data
->vgroups
.length () > MAX_CONSIDERED_GROUPS
)
7805 /* Finds candidates for the induction variables (item 2). */
7806 find_iv_candidates (data
);
7808 /* Calculates the costs (item 3, part 1). */
7809 determine_iv_costs (data
);
7810 determine_group_iv_costs (data
);
7811 determine_set_costs (data
);
7813 /* Find the optimal set of induction variables (item 3, part 2). */
7814 iv_ca
= find_optimal_iv_set (data
);
7819 /* Create the new induction variables (item 4, part 1). */
7820 create_new_ivs (data
, iv_ca
);
7821 iv_ca_free (&iv_ca
);
7823 /* Rewrite the uses (item 4, part 2). */
7824 rewrite_groups (data
);
7826 /* Remove the ivs that are unused after rewriting. */
7827 remove_unused_ivs (data
);
7829 /* We have changed the structure of induction variables; it might happen
7830 that definitions in the scev database refer to some of them that were
7835 free_loop_data (data
);
7840 /* Main entry point. Optimizes induction variables in loops. */
7843 tree_ssa_iv_optimize (void)
7846 struct ivopts_data data
;
7848 tree_ssa_iv_optimize_init (&data
);
7850 /* Optimize the loops starting with the innermost ones. */
7851 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7854 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7856 tree_ssa_iv_optimize_loop (&data
, loop
);
7859 tree_ssa_iv_optimize_finalize (&data
);