1 /* Induction variable optimizations.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
71 #include "coretypes.h"
77 #include "tree-pass.h"
82 #include "insn-config.h"
86 #include "gimple-pretty-print.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
104 #include "tree-scalar-evolution.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 10000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop
*loop
)
126 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
129 niter
= likely_max_stmt_executions_int (loop
);
131 if (niter
== -1 || niter
> PARAM_VALUE (PARAM_AVG_LOOP_NITER
))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER
);
140 /* Representation of the induction variable. */
143 tree base
; /* Initial value of the iv. */
144 tree base_object
; /* A memory object to that the induction variable points. */
145 tree step
; /* Step of the iv (constant only). */
146 tree ssa_name
; /* The ssa name with the value. */
147 struct iv_use
*nonlin_use
; /* The identifier in the use if it is the case. */
148 bool biv_p
; /* Is it a biv? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS
, /* Use is an address for an explicit memory
171 USE_PTR_ADDRESS
, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE
/* Use is a compare. */
177 /* Cost of a computation. */
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int cost
, unsigned complexity
, int scratch
= 0)
184 : cost (cost
), complexity (complexity
), scratch (scratch
)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost
operator+ (comp_cost cost1
, comp_cost cost2
);
193 /* Adds COST to the comp_cost. */
194 comp_cost
operator+= (comp_cost cost
);
196 /* Adds constant C to this comp_cost. */
197 comp_cost
operator+= (HOST_WIDE_INT c
);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost
operator-= (HOST_WIDE_INT c
);
202 /* Divide the comp_cost by constant C. */
203 comp_cost
operator/= (HOST_WIDE_INT c
);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost
operator*= (HOST_WIDE_INT c
);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost
operator- (comp_cost cost1
, comp_cost cost2
);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost
operator-= (comp_cost cost
);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1
, comp_cost cost2
);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1
, comp_cost cost2
);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1
, comp_cost cost2
);
223 int cost
; /* The runtime cost. */
224 unsigned complexity
; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch
; /* Scratch used during cost computation. */
231 static const comp_cost no_cost
;
232 static const comp_cost
infinite_cost (INFTY
, INFTY
, INFTY
);
235 comp_cost::infinite_cost_p ()
237 return cost
== INFTY
;
241 operator+ (comp_cost cost1
, comp_cost cost2
)
243 if (cost1
.infinite_cost_p () || cost2
.infinite_cost_p ())
244 return infinite_cost
;
246 cost1
.cost
+= cost2
.cost
;
247 cost1
.complexity
+= cost2
.complexity
;
253 operator- (comp_cost cost1
, comp_cost cost2
)
255 if (cost1
.infinite_cost_p ())
256 return infinite_cost
;
258 gcc_assert (!cost2
.infinite_cost_p ());
260 cost1
.cost
-= cost2
.cost
;
261 cost1
.complexity
-= cost2
.complexity
;
267 comp_cost::operator+= (comp_cost cost
)
269 *this = *this + cost
;
274 comp_cost::operator+= (HOST_WIDE_INT c
)
276 if (infinite_cost_p ())
285 comp_cost::operator-= (HOST_WIDE_INT c
)
287 if (infinite_cost_p ())
296 comp_cost::operator/= (HOST_WIDE_INT c
)
298 if (infinite_cost_p ())
307 comp_cost::operator*= (HOST_WIDE_INT c
)
309 if (infinite_cost_p ())
318 comp_cost::operator-= (comp_cost cost
)
320 *this = *this - cost
;
325 operator< (comp_cost cost1
, comp_cost cost2
)
327 if (cost1
.cost
== cost2
.cost
)
328 return cost1
.complexity
< cost2
.complexity
;
330 return cost1
.cost
< cost2
.cost
;
334 operator== (comp_cost cost1
, comp_cost cost2
)
336 return cost1
.cost
== cost2
.cost
337 && cost1
.complexity
== cost2
.complexity
;
341 operator<= (comp_cost cost1
, comp_cost cost2
)
343 return cost1
< cost2
|| cost1
== cost2
;
346 struct iv_inv_expr_ent
;
348 /* The candidate - cost pair. */
351 struct iv_cand
*cand
; /* The candidate. */
352 comp_cost cost
; /* The cost. */
353 enum tree_code comp
; /* For iv elimination, the comparison. */
354 bitmap inv_vars
; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs
; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value
; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
366 unsigned id
; /* The id of the use. */
367 unsigned group_id
; /* The group id the use belongs to. */
368 enum use_type type
; /* Type of the use. */
369 tree mem_type
; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
372 struct iv
*iv
; /* The induction variable it is based on. */
373 gimple
*stmt
; /* Statement in that it occurs. */
374 tree
*op_p
; /* The place where it occurs. */
376 tree addr_base
; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset
;
378 /* Const offset stripped from base address. */
384 /* The id of the group. */
386 /* Uses of the group are of the same type. */
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands
;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members
;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair
*cost_map
;
394 /* The selected candidate for the group. */
395 struct iv_cand
*selected
;
396 /* Uses in the group. */
397 vec
<struct iv_use
*> vuses
;
400 /* The position where the iv is computed. */
403 IP_NORMAL
, /* At the end, just before the exit condition. */
404 IP_END
, /* At the end of the latch block. */
405 IP_BEFORE_USE
, /* Immediately before a specific use. */
406 IP_AFTER_USE
, /* Immediately after a specific use. */
407 IP_ORIGINAL
/* The original biv. */
410 /* The induction variable candidate. */
413 unsigned id
; /* The number of the candidate. */
414 bool important
; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
417 gimple
*incremented_at
;/* For original biv, the statement where it is
419 tree var_before
; /* The variable used for it before increment. */
420 tree var_after
; /* The variable used for it after increment. */
421 struct iv
*iv
; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost
; /* Cost of the candidate. */
426 unsigned cost_step
; /* Cost of the candidate's increment operation. */
427 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars
; /* The list of invariant ssa_vars used in step of the
431 bitmap inv_exprs
; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
443 /* IV uses from which this common candidate is derived. */
444 auto_vec
<struct iv_use
*> uses
;
448 /* Hashtable helpers. */
450 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
452 static inline hashval_t
hash (const iv_common_cand
*);
453 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
456 /* Hash function for possible common candidates. */
459 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
464 /* Hash table equality function for common candidates. */
467 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
468 const iv_common_cand
*ccand2
)
470 return (ccand1
->hash
== ccand2
->hash
471 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
472 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
474 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
477 /* Loop invariant expression hashtable entry. */
479 struct iv_inv_expr_ent
481 /* Tree expression of the entry. */
483 /* Unique indentifier. */
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
492 sort_iv_inv_expr_ent (const void *a
, const void *b
)
494 const iv_inv_expr_ent
* const *e1
= (const iv_inv_expr_ent
* const *) (a
);
495 const iv_inv_expr_ent
* const *e2
= (const iv_inv_expr_ent
* const *) (b
);
497 unsigned id1
= (*e1
)->id
;
498 unsigned id2
= (*e2
)->id
;
508 /* Hashtable helpers. */
510 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
512 static inline hashval_t
hash (const iv_inv_expr_ent
*);
513 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
516 /* Return true if uses of type TYPE represent some form of address. */
519 address_p (use_type type
)
521 return type
== USE_REF_ADDRESS
|| type
== USE_PTR_ADDRESS
;
524 /* Hash function for loop invariant expressions. */
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
532 /* Hash table equality function for expressions. */
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
536 const iv_inv_expr_ent
*expr2
)
538 return expr1
->hash
== expr2
->hash
539 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
544 /* The currently optimized loop. */
545 struct loop
*current_loop
;
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map
<edge
, tree_niter_desc
*> *niters
;
551 /* Number of registers used in it. */
554 /* The size of version_info array allocated. */
555 unsigned version_info_size
;
557 /* The array of information for the ssa names. */
558 struct version_info
*version_info
;
560 /* The hashtable of loop invariant expressions created
562 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
564 /* The bitmap of indices in version_info whose value was changed. */
567 /* The uses of induction variables. */
568 vec
<iv_group
*> vgroups
;
570 /* The candidates. */
571 vec
<iv_cand
*> vcands
;
573 /* A bitmap of important candidates. */
574 bitmap important_candidates
;
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
582 /* The common candidates. */
583 vec
<iv_common_cand
*> iv_common_cands
;
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id
;
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id
;
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr
;
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack
;
597 /* Whether to consider just related and important candidates when replacing a
599 bool consider_all_candidates
;
601 /* Are we optimizing for speed? */
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call
;
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p
;
611 /* An assignment of iv candidates to uses. */
615 /* The number of uses covered by the assignment. */
618 /* Number of uses that cannot be expressed by the candidates in the set. */
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair
**cand_for_group
;
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses
;
627 /* The candidates used. */
630 /* The number of candidates in the set. */
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost
;
640 /* Total cost of candidates. */
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses
;
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses
;
649 /* Total cost of the assignment. */
653 /* Difference of two iv candidate assignments. */
658 struct iv_group
*group
;
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair
*old_cp
;
663 /* A new assignment. */
664 struct cost_pair
*new_cp
;
666 /* Next change in the list. */
667 struct iv_ca_delta
*next
;
670 /* Bound on number of candidates below that all candidates are considered. */
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
687 /* The list of trees for that the decl_rtl field must be reset is stored
690 static vec
<tree
> decl_rtl_to_reset
;
692 static comp_cost
force_expr_to_var_cost (tree
, bool);
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
697 single_dom_exit (struct loop
*loop
)
699 edge exit
= single_exit (loop
);
704 if (!just_once_each_iteration_p (loop
, exit
->src
))
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
715 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
, unsigned indent_level
)
718 const char spaces
[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
720 if (indent_level
> 4)
722 p
= spaces
+ 8 - (indent_level
<< 1);
724 fprintf (file
, "%sIV struct:\n", p
);
725 if (iv
->ssa_name
&& dump_name
)
727 fprintf (file
, "%s SSA_NAME:\t", p
);
728 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
729 fprintf (file
, "\n");
732 fprintf (file
, "%s Type:\t", p
);
733 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
734 fprintf (file
, "\n");
736 fprintf (file
, "%s Base:\t", p
);
737 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
738 fprintf (file
, "\n");
740 fprintf (file
, "%s Step:\t", p
);
741 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
742 fprintf (file
, "\n");
746 fprintf (file
, "%s Object:\t", p
);
747 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
748 fprintf (file
, "\n");
751 fprintf (file
, "%s Biv:\t%c\n", p
, iv
->biv_p
? 'Y' : 'N');
753 fprintf (file
, "%s Overflowness wrto loop niter:\t%s\n",
754 p
, iv
->no_overflow
? "No-overflow" : "Overflow");
757 /* Dumps information about the USE to FILE. */
760 dump_use (FILE *file
, struct iv_use
*use
)
762 fprintf (file
, " Use %d.%d:\n", use
->group_id
, use
->id
);
763 fprintf (file
, " At stmt:\t");
764 print_gimple_stmt (file
, use
->stmt
, 0);
765 fprintf (file
, " At pos:\t");
767 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
768 fprintf (file
, "\n");
769 dump_iv (file
, use
->iv
, false, 2);
772 /* Dumps information about the uses to FILE. */
775 dump_groups (FILE *file
, struct ivopts_data
*data
)
778 struct iv_group
*group
;
780 for (i
= 0; i
< data
->vgroups
.length (); i
++)
782 group
= data
->vgroups
[i
];
783 fprintf (file
, "Group %d:\n", group
->id
);
784 if (group
->type
== USE_NONLINEAR_EXPR
)
785 fprintf (file
, " Type:\tGENERIC\n");
786 else if (group
->type
== USE_REF_ADDRESS
)
787 fprintf (file
, " Type:\tREFERENCE ADDRESS\n");
788 else if (group
->type
== USE_PTR_ADDRESS
)
789 fprintf (file
, " Type:\tPOINTER ARGUMENT ADDRESS\n");
792 gcc_assert (group
->type
== USE_COMPARE
);
793 fprintf (file
, " Type:\tCOMPARE\n");
795 for (j
= 0; j
< group
->vuses
.length (); j
++)
796 dump_use (file
, group
->vuses
[j
]);
800 /* Dumps information about induction variable candidate CAND to FILE. */
803 dump_cand (FILE *file
, struct iv_cand
*cand
)
805 struct iv
*iv
= cand
->iv
;
807 fprintf (file
, "Candidate %d:\n", cand
->id
);
810 fprintf (file
, " Depend on inv.vars: ");
811 dump_bitmap (file
, cand
->inv_vars
);
815 fprintf (file
, " Depend on inv.exprs: ");
816 dump_bitmap (file
, cand
->inv_exprs
);
819 if (cand
->var_before
)
821 fprintf (file
, " Var befor: ");
822 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
823 fprintf (file
, "\n");
827 fprintf (file
, " Var after: ");
828 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
829 fprintf (file
, "\n");
835 fprintf (file
, " Incr POS: before exit test\n");
839 fprintf (file
, " Incr POS: before use %d\n", cand
->ainc_use
->id
);
843 fprintf (file
, " Incr POS: after use %d\n", cand
->ainc_use
->id
);
847 fprintf (file
, " Incr POS: at end\n");
851 fprintf (file
, " Incr POS: orig biv\n");
855 dump_iv (file
, iv
, false, 1);
858 /* Returns the info for ssa version VER. */
860 static inline struct version_info
*
861 ver_info (struct ivopts_data
*data
, unsigned ver
)
863 return data
->version_info
+ ver
;
866 /* Returns the info for ssa name NAME. */
868 static inline struct version_info
*
869 name_info (struct ivopts_data
*data
, tree name
)
871 return ver_info (data
, SSA_NAME_VERSION (name
));
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
878 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
880 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
884 if (sbb
== loop
->latch
)
890 return stmt
== last_stmt (bb
);
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
898 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
900 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
901 basic_block stmt_bb
= gimple_bb (stmt
);
903 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
906 if (stmt_bb
!= cand_bb
)
910 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
912 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
919 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
927 return stmt_after_ip_normal_pos (loop
, stmt
);
931 return stmt_after_inc_pos (cand
, stmt
, false);
934 return stmt_after_inc_pos (cand
, stmt
, true);
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
944 abnormal_ssa_name_p (tree exp
)
949 if (TREE_CODE (exp
) != SSA_NAME
)
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
959 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
960 void *data ATTRIBUTE_UNUSED
)
962 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
964 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
966 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
970 return !abnormal_ssa_name_p (*index
);
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
977 contains_abnormal_ssa_name_p (tree expr
)
980 enum tree_code_class codeclass
;
985 code
= TREE_CODE (expr
);
986 codeclass
= TREE_CODE_CLASS (code
);
988 if (code
== CALL_EXPR
)
991 call_expr_arg_iterator iter
;
992 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, expr
)
993 if (contains_abnormal_ssa_name_p (arg
))
998 if (code
== SSA_NAME
)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
1001 if (code
== INTEGER_CST
1002 || is_gimple_min_invariant (expr
))
1005 if (code
== ADDR_EXPR
)
1006 return !for_each_index (&TREE_OPERAND (expr
, 0),
1007 idx_contains_abnormal_ssa_name_p
,
1010 if (code
== COND_EXPR
)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
1018 case tcc_comparison
:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1039 static struct tree_niter_desc
*
1040 niter_for_exit (struct ivopts_data
*data
, edge exit
)
1042 struct tree_niter_desc
*desc
;
1043 tree_niter_desc
**slot
;
1047 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
1051 slot
= data
->niters
->get (exit
);
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc
= XNEW (struct tree_niter_desc
);
1059 if (!number_of_iterations_exit (data
->current_loop
,
1061 || contains_abnormal_ssa_name_p (desc
->niter
))
1066 data
->niters
->put (exit
, desc
);
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1078 static struct tree_niter_desc
*
1079 niter_for_single_dom_exit (struct ivopts_data
*data
)
1081 edge exit
= single_dom_exit (data
->current_loop
);
1086 return niter_for_exit (data
, exit
);
1089 /* Initializes data structures used by the iv optimization pass, stored
1093 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
1095 data
->version_info_size
= 2 * num_ssa_names
;
1096 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
1097 data
->relevant
= BITMAP_ALLOC (NULL
);
1098 data
->important_candidates
= BITMAP_ALLOC (NULL
);
1099 data
->max_inv_var_id
= 0;
1100 data
->max_inv_expr_id
= 0;
1101 data
->niters
= NULL
;
1102 data
->vgroups
.create (20);
1103 data
->vcands
.create (20);
1104 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
1105 data
->name_expansion_cache
= NULL
;
1106 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
1107 data
->iv_common_cands
.create (20);
1108 decl_rtl_to_reset
.create (20);
1109 gcc_obstack_init (&data
->iv_obstack
);
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1116 determine_base_object (tree expr
)
1118 enum tree_code code
= TREE_CODE (expr
);
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr
))
1125 return determine_base_object (TREE_OPERAND (expr
, 0));
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
1136 obj
= TREE_OPERAND (expr
, 0);
1137 base
= get_base_address (obj
);
1142 if (TREE_CODE (base
) == MEM_REF
)
1143 return determine_base_object (TREE_OPERAND (base
, 0));
1145 return fold_convert (ptr_type_node
,
1146 build_fold_addr_expr (base
));
1148 case POINTER_PLUS_EXPR
:
1149 return determine_base_object (TREE_OPERAND (expr
, 0));
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1157 if (POLY_INT_CST_P (expr
))
1159 return fold_convert (ptr_type_node
, expr
);
1163 /* Return true if address expression with non-DECL_P operand appears
1167 contain_complex_addr_expr (tree expr
)
1172 switch (TREE_CODE (expr
))
1174 case POINTER_PLUS_EXPR
:
1177 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1178 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1182 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1195 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1196 bool no_overflow
= false)
1199 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1200 sizeof (struct iv
));
1201 gcc_assert (step
!= NULL_TREE
);
1203 /* Lower address expression in base except ones with DECL_P as operand.
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1209 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1210 || contain_complex_addr_expr (expr
))
1213 tree_to_aff_combination (expr
, TREE_TYPE (expr
), &comb
);
1214 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1218 iv
->base_object
= determine_base_object (base
);
1221 iv
->nonlin_use
= NULL
;
1222 iv
->ssa_name
= NULL_TREE
;
1224 && !iv_can_overflow_p (data
->current_loop
, TREE_TYPE (base
),
1227 iv
->no_overflow
= no_overflow
;
1228 iv
->have_address_use
= false;
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1237 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1240 struct version_info
*info
= name_info (data
, iv
);
1242 gcc_assert (!info
->iv
);
1244 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1245 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1246 info
->iv
->ssa_name
= iv
;
1249 /* Finds induction variable declaration for VAR. */
1252 get_iv (struct ivopts_data
*data
, tree var
)
1255 tree type
= TREE_TYPE (var
);
1257 if (!POINTER_TYPE_P (type
)
1258 && !INTEGRAL_TYPE_P (type
))
1261 if (!name_info (data
, var
)->iv
)
1263 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1266 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1267 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1270 return name_info (data
, var
)->iv
;
1273 /* Return the first non-invariant ssa var found in EXPR. */
1276 extract_single_var_from_expr (tree expr
)
1280 enum tree_code code
;
1282 if (!expr
|| is_gimple_min_invariant (expr
))
1285 code
= TREE_CODE (expr
);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1288 n
= TREE_OPERAND_LENGTH (expr
);
1289 for (i
= 0; i
< n
; i
++)
1291 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1297 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1300 /* Finds basic ivs. */
1303 find_bivs (struct ivopts_data
*data
)
1307 tree step
, type
, base
, stop
;
1309 struct loop
*loop
= data
->current_loop
;
1312 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1319 if (virtual_operand_p (PHI_RESULT (phi
)))
1322 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1325 if (integer_zerop (iv
.step
))
1329 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1334 See PR64705 for the rationale. */
1335 stop
= extract_single_var_from_expr (step
);
1336 base
= expand_simple_operations (base
, stop
);
1337 if (contains_abnormal_ssa_name_p (base
)
1338 || contains_abnormal_ssa_name_p (step
))
1341 type
= TREE_TYPE (PHI_RESULT (phi
));
1342 base
= fold_convert (type
, base
);
1345 if (POINTER_TYPE_P (type
))
1346 step
= convert_to_ptrofftype (step
);
1348 step
= fold_convert (type
, step
);
1351 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1358 /* Marks basic ivs. */
1361 mark_bivs (struct ivopts_data
*data
)
1366 struct iv
*iv
, *incr_iv
;
1367 struct loop
*loop
= data
->current_loop
;
1368 basic_block incr_bb
;
1371 data
->bivs_not_used_in_addr
= 0;
1372 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1376 iv
= get_iv (data
, PHI_RESULT (phi
));
1380 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1381 def
= SSA_NAME_DEF_STMT (var
);
1382 /* Don't mark iv peeled from other one as biv. */
1384 && gimple_code (def
) == GIMPLE_PHI
1385 && gimple_bb (def
) == loop
->header
)
1388 incr_iv
= get_iv (data
, var
);
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1394 if (incr_bb
->loop_father
!= data
->current_loop
1395 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1399 incr_iv
->biv_p
= true;
1400 if (iv
->no_overflow
)
1401 data
->bivs_not_used_in_addr
++;
1402 if (incr_iv
->no_overflow
)
1403 data
->bivs_not_used_in_addr
++;
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1411 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1414 struct loop
*loop
= data
->current_loop
;
1416 iv
->base
= NULL_TREE
;
1417 iv
->step
= NULL_TREE
;
1419 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1422 lhs
= gimple_assign_lhs (stmt
);
1423 if (TREE_CODE (lhs
) != SSA_NAME
)
1426 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1433 See PR64705 for the rationale. */
1434 stop
= extract_single_var_from_expr (iv
->step
);
1435 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1436 if (contains_abnormal_ssa_name_p (iv
->base
)
1437 || contains_abnormal_ssa_name_p (iv
->step
))
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we can not safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (cfun
, stmt
))
1449 /* Finds general ivs in statement STMT. */
1452 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1456 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1459 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1462 /* Finds general ivs in basic block BB. */
1465 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1467 gimple_stmt_iterator bsi
;
1469 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1470 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1473 /* Finds general ivs. */
1476 find_givs (struct ivopts_data
*data
)
1478 struct loop
*loop
= data
->current_loop
;
1479 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1482 for (i
= 0; i
< loop
->num_nodes
; i
++)
1483 find_givs_in_bb (data
, body
[i
]);
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1491 find_induction_variables (struct ivopts_data
*data
)
1496 if (!find_bivs (data
))
1502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1504 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1508 fprintf (dump_file
, " number of iterations ");
1509 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1510 if (!integer_zerop (niter
->may_be_zero
))
1512 fprintf (dump_file
, "; zero if ");
1513 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1515 fprintf (dump_file
, "\n");
1518 fprintf (dump_file
, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1521 struct version_info
*info
= ver_info (data
, i
);
1522 if (info
->iv
&& info
->iv
->step
&& !integer_zerop (info
->iv
->step
))
1523 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true, 0);
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1536 static struct iv_use
*
1537 record_use (struct iv_group
*group
, tree
*use_p
, struct iv
*iv
,
1538 gimple
*stmt
, enum use_type type
, tree mem_type
,
1539 tree addr_base
, poly_uint64 addr_offset
)
1541 struct iv_use
*use
= XCNEW (struct iv_use
);
1543 use
->id
= group
->vuses
.length ();
1544 use
->group_id
= group
->id
;
1546 use
->mem_type
= mem_type
;
1550 use
->addr_base
= addr_base
;
1551 use
->addr_offset
= addr_offset
;
1553 group
->vuses
.safe_push (use
);
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1562 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1565 struct version_info
*info
;
1567 if (TREE_CODE (op
) != SSA_NAME
1568 || virtual_operand_p (op
))
1571 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1573 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1576 info
= name_info (data
, op
);
1578 info
->has_nonlin_use
|= nonlinear_use
;
1580 info
->inv_id
= ++data
->max_inv_var_id
;
1581 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1584 /* Record a group of TYPE. */
1586 static struct iv_group
*
1587 record_group (struct ivopts_data
*data
, enum use_type type
)
1589 struct iv_group
*group
= XCNEW (struct iv_group
);
1591 group
->id
= data
->vgroups
.length ();
1593 group
->related_cands
= BITMAP_ALLOC (NULL
);
1594 group
->vuses
.create (1);
1596 data
->vgroups
.safe_push (group
);
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1605 static struct iv_use
*
1606 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1607 struct iv
*iv
, gimple
*stmt
, enum use_type type
,
1610 tree addr_base
= NULL
;
1611 struct iv_group
*group
= NULL
;
1612 poly_uint64 addr_offset
= 0;
1614 /* Record non address type use in a new group. */
1615 if (address_p (type
))
1619 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1620 for (i
= 0; i
< data
->vgroups
.length (); i
++)
1624 group
= data
->vgroups
[i
];
1625 use
= group
->vuses
[0];
1626 if (!address_p (use
->type
))
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1631 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1632 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1635 if (i
== data
->vgroups
.length ())
1640 group
= record_group (data
, type
);
1642 return record_use (group
, use_p
, iv
, stmt
, type
, mem_type
,
1643 addr_base
, addr_offset
);
1646 /* Checks whether the use OP is interesting and if so, records it. */
1648 static struct iv_use
*
1649 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1655 if (TREE_CODE (op
) != SSA_NAME
)
1658 iv
= get_iv (data
, op
);
1664 gcc_assert (iv
->nonlin_use
->type
== USE_NONLINEAR_EXPR
);
1665 return iv
->nonlin_use
;
1668 if (integer_zerop (iv
->step
))
1670 record_invariant (data
, op
, true);
1674 stmt
= SSA_NAME_DEF_STMT (op
);
1675 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
|| is_gimple_assign (stmt
));
1677 use
= record_group_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
, NULL_TREE
);
1678 iv
->nonlin_use
= use
;
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1706 tree
**control_var
, tree
**bound
,
1707 struct iv
**iv_var
, struct iv
**iv_bound
)
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv
;
1712 tree
*op0
= &zero
, *op1
= &zero
;
1713 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1714 enum comp_iv_rewrite rewrite_type
= COMP_IV_NA
;
1716 if (gimple_code (stmt
) == GIMPLE_COND
)
1718 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1719 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1720 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1724 op0
= gimple_assign_rhs1_ptr (stmt
);
1725 op1
= gimple_assign_rhs2_ptr (stmt
);
1728 zero
= integer_zero_node
;
1729 const_iv
.step
= integer_zero_node
;
1731 if (TREE_CODE (*op0
) == SSA_NAME
)
1732 iv0
= get_iv (data
, *op0
);
1733 if (TREE_CODE (*op1
) == SSA_NAME
)
1734 iv1
= get_iv (data
, *op1
);
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0
&& iv1
&& !integer_zerop (iv0
->step
) && !integer_zerop (iv1
->step
))
1739 rewrite_type
= COMP_IV_EXPR_2
;
1743 /* If none side of comparison is IV. */
1744 if ((!iv0
|| integer_zerop (iv0
->step
))
1745 && (!iv1
|| integer_zerop (iv1
->step
)))
1748 /* Control variable may be on the other side. */
1749 if (!iv0
|| integer_zerop (iv0
->step
))
1751 std::swap (op0
, op1
);
1752 std::swap (iv0
, iv1
);
1754 /* If one side is IV and the other side isn't loop invariant. */
1756 rewrite_type
= COMP_IV_EXPR
;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
))
1759 rewrite_type
= COMP_IV_ELIM
;
1771 return rewrite_type
;
1774 /* Checks whether the condition in STMT is interesting and if so,
1778 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1780 tree
*var_p
, *bound_p
;
1781 struct iv
*var_iv
, *bound_iv
;
1782 enum comp_iv_rewrite ret
;
1784 ret
= extract_cond_operands (data
, stmt
,
1785 &var_p
, &bound_p
, &var_iv
, &bound_iv
);
1786 if (ret
== COMP_IV_NA
)
1788 find_interesting_uses_op (data
, *var_p
);
1789 find_interesting_uses_op (data
, *bound_p
);
1793 record_group_use (data
, var_p
, var_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret
== COMP_IV_EXPR_2
)
1796 record_group_use (data
, bound_p
, bound_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1805 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1810 if (is_gimple_min_invariant (expr
))
1811 return current_loops
->tree_root
;
1813 if (TREE_CODE (expr
) == SSA_NAME
)
1815 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1818 if (flow_bb_inside_loop_p (loop
, def_bb
))
1820 return superloop_at_depth (loop
,
1821 loop_depth (def_bb
->loop_father
) + 1);
1824 return current_loops
->tree_root
;
1830 unsigned maxdepth
= 0;
1831 len
= TREE_OPERAND_LENGTH (expr
);
1832 for (i
= 0; i
< len
; i
++)
1834 struct loop
*ivloop
;
1835 if (!TREE_OPERAND (expr
, i
))
1838 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1841 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1844 return superloop_at_depth (loop
, maxdepth
);
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1852 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1857 gcc_assert (loop_depth (loop
) > 0);
1859 if (is_gimple_min_invariant (expr
))
1862 if (TREE_CODE (expr
) == SSA_NAME
)
1864 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1866 && flow_bb_inside_loop_p (loop
, def_bb
))
1875 len
= TREE_OPERAND_LENGTH (expr
);
1876 for (i
= 0; i
< len
; i
++)
1877 if (TREE_OPERAND (expr
, i
)
1878 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1889 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1894 enum tree_code code
;
1897 if (expr
== NULL_TREE
)
1900 if (is_gimple_min_invariant (expr
))
1903 code
= TREE_CODE (expr
);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1906 n
= TREE_OPERAND_LENGTH (expr
);
1907 for (i
= 0; i
< n
; i
++)
1909 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1915 /* Stop if it's not ssa name. */
1916 if (code
!= SSA_NAME
)
1919 iv
= get_iv (data
, expr
);
1920 if (!iv
|| integer_zerop (iv
->step
))
1925 stmt
= SSA_NAME_DEF_STMT (expr
);
1926 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1929 use_operand_p use_p
;
1930 basic_block phi_bb
= gimple_bb (phi
);
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb
->loop_father
== data
->current_loop
)
1936 if (virtual_operand_p (gimple_phi_result (phi
)))
1939 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1941 tree use
= USE_FROM_PTR (use_p
);
1942 iv
= find_deriving_biv_for_expr (data
, use
);
1948 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1951 e1
= gimple_assign_rhs1 (stmt
);
1952 code
= gimple_assign_rhs_code (stmt
);
1953 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1954 return find_deriving_biv_for_expr (data
, e1
);
1961 case POINTER_PLUS_EXPR
:
1962 /* Increments, decrements and multiplications by a constant
1964 e2
= gimple_assign_rhs2 (stmt
);
1965 iv
= find_deriving_biv_for_expr (data
, e2
);
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data
, e1
);
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1985 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1988 tree type
, base_1
, base_2
;
1991 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1992 || biv
->have_address_use
|| !biv
->no_overflow
)
1995 type
= TREE_TYPE (biv
->base
);
1996 if (!INTEGRAL_TYPE_P (type
))
1999 biv
->have_address_use
= true;
2000 data
->bivs_not_used_in_addr
--;
2001 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
2002 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2004 struct iv
*iv
= ver_info (data
, i
)->iv
;
2006 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
2007 || iv
->have_address_use
|| !iv
->no_overflow
)
2010 if (type
!= TREE_TYPE (iv
->base
)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
2014 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
2017 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
2018 if (operand_equal_p (base_1
, iv
->base
, 0)
2019 || operand_equal_p (base_2
, biv
->base
, 0))
2021 iv
->have_address_use
= true;
2022 data
->bivs_not_used_in_addr
--;
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2031 struct ifs_ivopts_data
2033 struct ivopts_data
*ivopts_data
;
2039 idx_find_step (tree base
, tree
*idx
, void *data
)
2041 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
2043 bool use_overflow_semantics
= false;
2044 tree step
, iv_base
, iv_step
, lbound
, off
;
2045 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
2047 /* If base is a component ref, require that the offset of the reference
2049 if (TREE_CODE (base
) == COMPONENT_REF
)
2051 off
= component_ref_field_offset (base
);
2052 return expr_invariant_in_loop_p (loop
, off
);
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base
) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
2066 step
= array_ref_element_size (base
);
2067 lbound
= array_ref_low_bound (base
);
2069 if (!expr_invariant_in_loop_p (loop
, step
)
2070 || !expr_invariant_in_loop_p (loop
, lbound
))
2074 if (TREE_CODE (*idx
) != SSA_NAME
)
2077 iv
= get_iv (dta
->ivopts_data
, *idx
);
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2086 if (integer_zerop (iv
->step
))
2089 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2091 step
= array_ref_element_size (base
);
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step
) != INTEGER_CST
)
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step
= size_one_node
;
2103 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
2104 use_overflow_semantics
= true;
2106 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
2107 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
2108 use_overflow_semantics
))
2110 /* The index might wrap. */
2114 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
2115 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
2117 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
2120 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
2122 record_biv_for_address_use (dta
->ivopts_data
, iv
);
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2131 idx_record_use (tree base
, tree
*idx
,
2134 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
2135 find_interesting_uses_op (data
, *idx
);
2136 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2138 find_interesting_uses_op (data
, array_ref_element_size (base
));
2139 find_interesting_uses_op (data
, array_ref_low_bound (base
));
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2150 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
2153 enum tree_code code
;
2154 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2155 widest_int res
, p0
, p1
;
2160 if (operand_equal_p (top
, bot
, 0))
2166 code
= TREE_CODE (top
);
2170 mby
= TREE_OPERAND (top
, 1);
2171 if (TREE_CODE (mby
) != INTEGER_CST
)
2174 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2177 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
2182 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2183 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2186 if (code
== MINUS_EXPR
)
2188 *mul
= wi::sext (p0
+ p1
, precision
);
2192 if (TREE_CODE (bot
) != INTEGER_CST
)
2195 p0
= widest_int::from (wi::to_wide (top
), SIGNED
);
2196 p1
= widest_int::from (wi::to_wide (bot
), SIGNED
);
2199 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
2203 if (POLY_INT_CST_P (top
)
2204 && POLY_INT_CST_P (bot
)
2205 && constant_multiple_p (wi::to_poly_widest (top
),
2206 wi::to_poly_widest (bot
), mul
))
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2216 may_be_unaligned_p (tree ref
, tree step
)
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
2223 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2225 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2227 unsigned HOST_WIDE_INT bitpos
;
2228 unsigned int ref_align
;
2229 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2230 if (ref_align
< align
2231 || (bitpos
% align
) != 0
2232 || (bitpos
% BITS_PER_UNIT
) != 0)
2235 unsigned int trailing_zeros
= tree_ctz (step
);
2236 if (trailing_zeros
< HOST_BITS_PER_INT
2237 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2243 /* Return true if EXPR may be non-addressable. */
2246 may_be_nonaddressable_p (tree expr
)
2248 switch (TREE_CODE (expr
))
2250 case TARGET_MEM_REF
:
2251 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2252 target, thus they are always addressable. */
2256 /* Likewise for MEM_REFs, modulo the storage order. */
2257 return REF_REVERSE_STORAGE_ORDER (expr
);
2260 if (REF_REVERSE_STORAGE_ORDER (expr
))
2262 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2265 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2267 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2268 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2271 case ARRAY_RANGE_REF
:
2272 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2274 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2276 case VIEW_CONVERT_EXPR
:
2277 /* This kind of view-conversions may wrap non-addressable objects
2278 and make them look addressable. After some processing the
2279 non-addressability may be uncovered again, causing ADDR_EXPRs
2280 of inappropriate objects to be built. */
2281 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2282 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2284 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2296 /* Finds addresses in *OP_P inside STMT. */
2299 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2302 tree base
= *op_p
, step
= size_zero_node
;
2304 struct ifs_ivopts_data ifs_ivopts_data
;
2306 /* Do not play with volatile memory references. A bit too conservative,
2307 perhaps, but safe. */
2308 if (gimple_has_volatile_ops (stmt
))
2311 /* Ignore bitfields for now. Not really something terribly complicated
2313 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2316 base
= unshare_expr (base
);
2318 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2320 tree type
= build_pointer_type (TREE_TYPE (base
));
2324 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2326 civ
= get_iv (data
, TMR_BASE (base
));
2330 TMR_BASE (base
) = civ
->base
;
2333 if (TMR_INDEX2 (base
)
2334 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2336 civ
= get_iv (data
, TMR_INDEX2 (base
));
2340 TMR_INDEX2 (base
) = civ
->base
;
2343 if (TMR_INDEX (base
)
2344 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2346 civ
= get_iv (data
, TMR_INDEX (base
));
2350 TMR_INDEX (base
) = civ
->base
;
2355 if (TMR_STEP (base
))
2356 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2358 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2362 if (integer_zerop (step
))
2364 base
= tree_mem_ref_addr (type
, base
);
2368 ifs_ivopts_data
.ivopts_data
= data
;
2369 ifs_ivopts_data
.stmt
= stmt
;
2370 ifs_ivopts_data
.step
= size_zero_node
;
2371 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2372 || integer_zerop (ifs_ivopts_data
.step
))
2374 step
= ifs_ivopts_data
.step
;
2376 /* Check that the base expression is addressable. This needs
2377 to be done after substituting bases of IVs into it. */
2378 if (may_be_nonaddressable_p (base
))
2381 /* Moreover, on strict alignment platforms, check that it is
2382 sufficiently aligned. */
2383 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2386 base
= build_fold_addr_expr (base
);
2388 /* Substituting bases of IVs into the base expression might
2389 have caused folding opportunities. */
2390 if (TREE_CODE (base
) == ADDR_EXPR
)
2392 tree
*ref
= &TREE_OPERAND (base
, 0);
2393 while (handled_component_p (*ref
))
2394 ref
= &TREE_OPERAND (*ref
, 0);
2395 if (TREE_CODE (*ref
) == MEM_REF
)
2397 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2398 TREE_OPERAND (*ref
, 0),
2399 TREE_OPERAND (*ref
, 1));
2406 civ
= alloc_iv (data
, base
, step
);
2407 /* Fail if base object of this memory reference is unknown. */
2408 if (civ
->base_object
== NULL_TREE
)
2411 record_group_use (data
, op_p
, civ
, stmt
, USE_REF_ADDRESS
, TREE_TYPE (*op_p
));
2415 for_each_index (op_p
, idx_record_use
, data
);
2418 /* Finds and records invariants used in STMT. */
2421 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2424 use_operand_p use_p
;
2427 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2429 op
= USE_FROM_PTR (use_p
);
2430 record_invariant (data
, op
, false);
2434 /* CALL calls an internal function. If operand *OP_P will become an
2435 address when the call is expanded, return the type of the memory
2436 being addressed, otherwise return null. */
2439 get_mem_type_for_internal_fn (gcall
*call
, tree
*op_p
)
2441 switch (gimple_call_internal_fn (call
))
2444 if (op_p
== gimple_call_arg_ptr (call
, 0))
2445 return TREE_TYPE (gimple_call_lhs (call
));
2448 case IFN_MASK_STORE
:
2449 if (op_p
== gimple_call_arg_ptr (call
, 0))
2450 return TREE_TYPE (gimple_call_arg (call
, 3));
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459 Return true if the operand will become an address when STMT
2460 is expanded and record the associated address use if so. */
2463 find_address_like_use (struct ivopts_data
*data
, gimple
*stmt
, tree
*op_p
,
2466 /* Fail if base object of this memory reference is unknown. */
2467 if (iv
->base_object
== NULL_TREE
)
2470 tree mem_type
= NULL_TREE
;
2471 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2472 if (gimple_call_internal_p (call
))
2473 mem_type
= get_mem_type_for_internal_fn (call
, op_p
);
2476 iv
= alloc_iv (data
, iv
->base
, iv
->step
);
2477 record_group_use (data
, op_p
, iv
, stmt
, USE_PTR_ADDRESS
, mem_type
);
2483 /* Finds interesting uses of induction variables in the statement STMT. */
2486 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2489 tree op
, *lhs
, *rhs
;
2491 use_operand_p use_p
;
2492 enum tree_code code
;
2494 find_invariants_stmt (data
, stmt
);
2496 if (gimple_code (stmt
) == GIMPLE_COND
)
2498 find_interesting_uses_cond (data
, stmt
);
2502 if (is_gimple_assign (stmt
))
2504 lhs
= gimple_assign_lhs_ptr (stmt
);
2505 rhs
= gimple_assign_rhs1_ptr (stmt
);
2507 if (TREE_CODE (*lhs
) == SSA_NAME
)
2509 /* If the statement defines an induction variable, the uses are not
2510 interesting by themselves. */
2512 iv
= get_iv (data
, *lhs
);
2514 if (iv
&& !integer_zerop (iv
->step
))
2518 code
= gimple_assign_rhs_code (stmt
);
2519 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2520 && (REFERENCE_CLASS_P (*rhs
)
2521 || is_gimple_val (*rhs
)))
2523 if (REFERENCE_CLASS_P (*rhs
))
2524 find_interesting_uses_address (data
, stmt
, rhs
);
2526 find_interesting_uses_op (data
, *rhs
);
2528 if (REFERENCE_CLASS_P (*lhs
))
2529 find_interesting_uses_address (data
, stmt
, lhs
);
2532 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2534 find_interesting_uses_cond (data
, stmt
);
2538 /* TODO -- we should also handle address uses of type
2540 memory = call (whatever);
2547 if (gimple_code (stmt
) == GIMPLE_PHI
2548 && gimple_bb (stmt
) == data
->current_loop
->header
)
2550 iv
= get_iv (data
, PHI_RESULT (stmt
));
2552 if (iv
&& !integer_zerop (iv
->step
))
2556 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2558 op
= USE_FROM_PTR (use_p
);
2560 if (TREE_CODE (op
) != SSA_NAME
)
2563 iv
= get_iv (data
, op
);
2567 if (!find_address_like_use (data
, stmt
, use_p
->use
, iv
))
2568 find_interesting_uses_op (data
, op
);
2572 /* Finds interesting uses of induction variables outside of loops
2573 on loop exit edge EXIT. */
2576 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2582 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2585 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2586 if (!virtual_operand_p (def
))
2587 find_interesting_uses_op (data
, def
);
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592 mode for memory reference represented by USE. */
2594 static GTY (()) vec
<rtx
, va_gc
> *addr_list
;
2597 addr_offset_valid_p (struct iv_use
*use
, poly_int64 offset
)
2600 unsigned list_index
;
2601 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2602 machine_mode addr_mode
, mem_mode
= TYPE_MODE (use
->mem_type
);
2604 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2605 if (list_index
>= vec_safe_length (addr_list
))
2606 vec_safe_grow_cleared (addr_list
, list_index
+ MAX_MACHINE_MODE
);
2608 addr
= (*addr_list
)[list_index
];
2611 addr_mode
= targetm
.addr_space
.address_mode (as
);
2612 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2613 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2614 (*addr_list
)[list_index
] = addr
;
2617 addr_mode
= GET_MODE (addr
);
2619 XEXP (addr
, 1) = gen_int_mode (offset
, addr_mode
);
2620 return (memory_address_addr_space_p (mem_mode
, addr
, as
));
2623 /* Comparison function to sort group in ascending order of addr_offset. */
2626 group_compare_offset (const void *a
, const void *b
)
2628 const struct iv_use
*const *u1
= (const struct iv_use
*const *) a
;
2629 const struct iv_use
*const *u2
= (const struct iv_use
*const *) b
;
2631 return compare_sizes_for_sort ((*u1
)->addr_offset
, (*u2
)->addr_offset
);
2634 /* Check if small groups should be split. Return true if no group
2635 contains more than two uses with distinct addr_offsets. Return
2636 false otherwise. We want to split such groups because:
2638 1) Small groups don't have much benefit and may interfer with
2639 general candidate selection.
2640 2) Size for problem with only small groups is usually small and
2641 general algorithm can handle it well.
2643 TODO -- Above claim may not hold when we want to merge memory
2644 accesses with conseuctive addresses. */
2647 split_small_address_groups_p (struct ivopts_data
*data
)
2649 unsigned int i
, j
, distinct
= 1;
2651 struct iv_group
*group
;
2653 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2655 group
= data
->vgroups
[i
];
2656 if (group
->vuses
.length () == 1)
2659 gcc_assert (address_p (group
->type
));
2660 if (group
->vuses
.length () == 2)
2662 if (compare_sizes_for_sort (group
->vuses
[0]->addr_offset
,
2663 group
->vuses
[1]->addr_offset
) > 0)
2664 std::swap (group
->vuses
[0], group
->vuses
[1]);
2667 group
->vuses
.qsort (group_compare_offset
);
2673 for (pre
= group
->vuses
[0], j
= 1; j
< group
->vuses
.length (); j
++)
2675 if (maybe_ne (group
->vuses
[j
]->addr_offset
, pre
->addr_offset
))
2677 pre
= group
->vuses
[j
];
2686 return (distinct
<= 2);
2689 /* For each group of address type uses, this function further groups
2690 these uses according to the maximum offset supported by target's
2691 [base + offset] addressing mode. */
2694 split_address_groups (struct ivopts_data
*data
)
2697 /* Always split group. */
2698 bool split_p
= split_small_address_groups_p (data
);
2700 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2702 struct iv_group
*new_group
= NULL
;
2703 struct iv_group
*group
= data
->vgroups
[i
];
2704 struct iv_use
*use
= group
->vuses
[0];
2707 use
->group_id
= group
->id
;
2708 if (group
->vuses
.length () == 1)
2711 gcc_assert (address_p (use
->type
));
2713 for (j
= 1; j
< group
->vuses
.length ();)
2715 struct iv_use
*next
= group
->vuses
[j
];
2716 poly_int64 offset
= next
->addr_offset
- use
->addr_offset
;
2718 /* Split group if aksed to, or the offset against the first
2719 use can't fit in offset part of addressing mode. IV uses
2720 having the same offset are still kept in one group. */
2721 if (maybe_ne (offset
, 0)
2722 && (split_p
|| !addr_offset_valid_p (use
, offset
)))
2725 new_group
= record_group (data
, group
->type
);
2726 group
->vuses
.ordered_remove (j
);
2727 new_group
->vuses
.safe_push (next
);
2732 next
->group_id
= group
->id
;
2738 /* Finds uses of the induction variables that are interesting. */
2741 find_interesting_uses (struct ivopts_data
*data
)
2744 gimple_stmt_iterator bsi
;
2745 basic_block
*body
= get_loop_body (data
->current_loop
);
2749 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2754 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2755 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2756 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2757 find_interesting_uses_outside (data
, e
);
2759 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2760 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2761 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2762 if (!is_gimple_debug (gsi_stmt (bsi
)))
2763 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2767 split_address_groups (data
);
2769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2771 fprintf (dump_file
, "\n<IV Groups>:\n");
2772 dump_groups (dump_file
, data
);
2773 fprintf (dump_file
, "\n");
2777 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2778 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2779 we are at the top-level of the processed address. */
2782 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2785 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2786 enum tree_code code
;
2787 tree type
, orig_type
= TREE_TYPE (expr
);
2788 poly_int64 off0
, off1
;
2790 tree orig_expr
= expr
;
2794 type
= TREE_TYPE (expr
);
2795 code
= TREE_CODE (expr
);
2800 case POINTER_PLUS_EXPR
:
2803 op0
= TREE_OPERAND (expr
, 0);
2804 op1
= TREE_OPERAND (expr
, 1);
2806 op0
= strip_offset_1 (op0
, false, false, &off0
);
2807 op1
= strip_offset_1 (op1
, false, false, &off1
);
2809 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2810 if (op0
== TREE_OPERAND (expr
, 0)
2811 && op1
== TREE_OPERAND (expr
, 1))
2814 if (integer_zerop (op1
))
2816 else if (integer_zerop (op0
))
2818 if (code
== MINUS_EXPR
)
2819 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2824 expr
= fold_build2 (code
, type
, op0
, op1
);
2826 return fold_convert (orig_type
, expr
);
2829 op1
= TREE_OPERAND (expr
, 1);
2830 if (!cst_and_fits_in_hwi (op1
))
2833 op0
= TREE_OPERAND (expr
, 0);
2834 op0
= strip_offset_1 (op0
, false, false, &off0
);
2835 if (op0
== TREE_OPERAND (expr
, 0))
2838 *offset
= off0
* int_cst_value (op1
);
2839 if (integer_zerop (op0
))
2842 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2844 return fold_convert (orig_type
, expr
);
2847 case ARRAY_RANGE_REF
:
2851 step
= array_ref_element_size (expr
);
2852 if (!cst_and_fits_in_hwi (step
))
2855 st
= int_cst_value (step
);
2856 op1
= TREE_OPERAND (expr
, 1);
2857 op1
= strip_offset_1 (op1
, false, false, &off1
);
2858 *offset
= off1
* st
;
2861 && integer_zerop (op1
))
2863 /* Strip the component reference completely. */
2864 op0
= TREE_OPERAND (expr
, 0);
2865 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2878 tmp
= component_ref_field_offset (expr
);
2879 field
= TREE_OPERAND (expr
, 1);
2881 && cst_and_fits_in_hwi (tmp
)
2882 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2884 HOST_WIDE_INT boffset
, abs_off
;
2886 /* Strip the component reference completely. */
2887 op0
= TREE_OPERAND (expr
, 0);
2888 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2889 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2890 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2894 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2901 op0
= TREE_OPERAND (expr
, 0);
2902 op0
= strip_offset_1 (op0
, true, true, &off0
);
2905 if (op0
== TREE_OPERAND (expr
, 0))
2908 expr
= build_fold_addr_expr (op0
);
2909 return fold_convert (orig_type
, expr
);
2912 /* ??? Offset operand? */
2913 inside_addr
= false;
2917 if (ptrdiff_tree_p (expr
, offset
) && maybe_ne (*offset
, 0))
2918 return build_int_cst (orig_type
, 0);
2922 /* Default handling of expressions for that we want to recurse into
2923 the first operand. */
2924 op0
= TREE_OPERAND (expr
, 0);
2925 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2928 if (op0
== TREE_OPERAND (expr
, 0)
2929 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2932 expr
= copy_node (expr
);
2933 TREE_OPERAND (expr
, 0) = op0
;
2935 TREE_OPERAND (expr
, 1) = op1
;
2937 /* Inside address, we might strip the top level component references,
2938 thus changing type of the expression. Handling of ADDR_EXPR
2940 expr
= fold_convert (orig_type
, expr
);
2945 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2948 strip_offset (tree expr
, poly_uint64_pod
*offset
)
2951 tree core
= strip_offset_1 (expr
, false, false, &off
);
2956 /* Returns variant of TYPE that can be used as base for different uses.
2957 We return unsigned type with the same precision, which avoids problems
2961 generic_type_for (tree type
)
2963 if (POINTER_TYPE_P (type
))
2964 return unsigned_type_for (type
);
2966 if (TYPE_UNSIGNED (type
))
2969 return unsigned_type_for (type
);
2972 /* Private data for walk_tree. */
2974 struct walk_tree_data
2977 struct ivopts_data
*idata
;
2980 /* Callback function for walk_tree, it records invariants and symbol
2981 reference in *EXPR_P. DATA is the structure storing result info. */
2984 find_inv_vars_cb (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2987 struct version_info
*info
;
2988 struct walk_tree_data
*wdata
= (struct walk_tree_data
*) data
;
2990 if (TREE_CODE (op
) != SSA_NAME
)
2993 info
= name_info (wdata
->idata
, op
);
2994 /* Because we expand simple operations when finding IVs, loop invariant
2995 variable that isn't referred by the original loop could be used now.
2996 Record such invariant variables here. */
2999 struct ivopts_data
*idata
= wdata
->idata
;
3000 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
3002 if (!bb
|| !flow_bb_inside_loop_p (idata
->current_loop
, bb
))
3004 set_iv (idata
, op
, op
, build_int_cst (TREE_TYPE (op
), 0), true);
3005 record_invariant (idata
, op
, false);
3008 if (!info
->inv_id
|| info
->has_nonlin_use
)
3011 if (!*wdata
->inv_vars
)
3012 *wdata
->inv_vars
= BITMAP_ALLOC (NULL
);
3013 bitmap_set_bit (*wdata
->inv_vars
, info
->inv_id
);
3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3022 find_inv_vars (struct ivopts_data
*data
, tree
*expr_p
, bitmap
*inv_vars
)
3024 struct walk_tree_data wdata
;
3030 wdata
.inv_vars
= inv_vars
;
3031 walk_tree (expr_p
, find_inv_vars_cb
, &wdata
, NULL
);
3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3035 will be recorded if it doesn't exist yet. Given below two exprs:
3036 inv_expr + cst1, inv_expr + cst2
3037 It's hard to make decision whether constant part should be stripped
3038 or not. We choose to not strip based on below facts:
3039 1) We need to count ADD cost for constant part if it's stripped,
3040 which is't always trivial where this functions is called.
3041 2) Stripping constant away may be conflict with following loop
3042 invariant hoisting pass.
3043 3) Not stripping constant away results in more invariant exprs,
3044 which usually leads to decision preferring lower reg pressure. */
3046 static iv_inv_expr_ent
*
3047 get_loop_invariant_expr (struct ivopts_data
*data
, tree inv_expr
)
3049 STRIP_NOPS (inv_expr
);
3051 if (poly_int_tree_p (inv_expr
)
3052 || TREE_CODE (inv_expr
) == SSA_NAME
)
3055 /* Don't strip constant part away as we used to. */
3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3058 struct iv_inv_expr_ent ent
;
3059 ent
.expr
= inv_expr
;
3060 ent
.hash
= iterative_hash_expr (inv_expr
, 0);
3061 struct iv_inv_expr_ent
**slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3065 *slot
= XNEW (struct iv_inv_expr_ent
);
3066 (*slot
)->expr
= inv_expr
;
3067 (*slot
)->hash
= ent
.hash
;
3068 (*slot
)->id
= ++data
->max_inv_expr_id
;
3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3075 position to POS. If USE is not NULL, the candidate is set as related to
3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3077 replacement of the final value of the iv by a direct computation. */
3079 static struct iv_cand
*
3080 add_candidate_1 (struct ivopts_data
*data
,
3081 tree base
, tree step
, bool important
, enum iv_position pos
,
3082 struct iv_use
*use
, gimple
*incremented_at
,
3083 struct iv
*orig_iv
= NULL
)
3086 struct iv_cand
*cand
= NULL
;
3087 tree type
, orig_type
;
3089 gcc_assert (base
&& step
);
3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092 live, but the ivopts code may replace a real pointer with one
3093 pointing before or after the memory block that is then adjusted
3094 into the memory block during the loop. FIXME: It would likely be
3095 better to actually force the pointer live and still use ivopts;
3096 for example, it would be enough to write the pointer into memory
3097 and keep it there until after the loop. */
3098 if (flag_keep_gc_roots_live
&& POINTER_TYPE_P (TREE_TYPE (base
)))
3101 /* For non-original variables, make sure their values are computed in a type
3102 that does not invoke undefined behavior on overflows (since in general,
3103 we cannot prove that these induction variables are non-wrapping). */
3104 if (pos
!= IP_ORIGINAL
)
3106 orig_type
= TREE_TYPE (base
);
3107 type
= generic_type_for (orig_type
);
3108 if (type
!= orig_type
)
3110 base
= fold_convert (type
, base
);
3111 step
= fold_convert (type
, step
);
3115 for (i
= 0; i
< data
->vcands
.length (); i
++)
3117 cand
= data
->vcands
[i
];
3119 if (cand
->pos
!= pos
)
3122 if (cand
->incremented_at
!= incremented_at
3123 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3124 && cand
->ainc_use
!= use
))
3127 if (operand_equal_p (base
, cand
->iv
->base
, 0)
3128 && operand_equal_p (step
, cand
->iv
->step
, 0)
3129 && (TYPE_PRECISION (TREE_TYPE (base
))
3130 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
3134 if (i
== data
->vcands
.length ())
3136 cand
= XCNEW (struct iv_cand
);
3138 cand
->iv
= alloc_iv (data
, base
, step
);
3140 if (pos
!= IP_ORIGINAL
)
3142 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
3143 cand
->var_after
= cand
->var_before
;
3145 cand
->important
= important
;
3146 cand
->incremented_at
= incremented_at
;
3147 data
->vcands
.safe_push (cand
);
3149 if (!poly_int_tree_p (step
))
3151 find_inv_vars (data
, &step
, &cand
->inv_vars
);
3153 iv_inv_expr_ent
*inv_expr
= get_loop_invariant_expr (data
, step
);
3154 /* Share bitmap between inv_vars and inv_exprs for cand. */
3155 if (inv_expr
!= NULL
)
3157 cand
->inv_exprs
= cand
->inv_vars
;
3158 cand
->inv_vars
= NULL
;
3159 if (cand
->inv_exprs
)
3160 bitmap_clear (cand
->inv_exprs
);
3162 cand
->inv_exprs
= BITMAP_ALLOC (NULL
);
3164 bitmap_set_bit (cand
->inv_exprs
, inv_expr
->id
);
3168 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3169 cand
->ainc_use
= use
;
3171 cand
->ainc_use
= NULL
;
3173 cand
->orig_iv
= orig_iv
;
3174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3175 dump_cand (dump_file
, cand
);
3178 cand
->important
|= important
;
3180 /* Relate candidate to the group for which it is added. */
3182 bitmap_set_bit (data
->vgroups
[use
->group_id
]->related_cands
, i
);
3187 /* Returns true if incrementing the induction variable at the end of the LOOP
3190 The purpose is to avoid splitting latch edge with a biv increment, thus
3191 creating a jump, possibly confusing other optimization passes and leaving
3192 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3193 available (so we do not have a better alternative), or if the latch edge
3194 is already nonempty. */
3197 allow_ip_end_pos_p (struct loop
*loop
)
3199 if (!ip_normal_pos (loop
))
3202 if (!empty_block_p (ip_end_pos (loop
)))
3208 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3209 Important field is set to IMPORTANT. */
3212 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
3213 bool important
, struct iv_use
*use
)
3215 basic_block use_bb
= gimple_bb (use
->stmt
);
3216 machine_mode mem_mode
;
3217 unsigned HOST_WIDE_INT cstepi
;
3219 /* If we insert the increment in any position other than the standard
3220 ones, we must ensure that it is incremented once per iteration.
3221 It must not be in an inner nested loop, or one side of an if
3223 if (use_bb
->loop_father
!= data
->current_loop
3224 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
3225 || stmt_can_throw_internal (cfun
, use
->stmt
)
3226 || !cst_and_fits_in_hwi (step
))
3229 cstepi
= int_cst_value (step
);
3231 mem_mode
= TYPE_MODE (use
->mem_type
);
3232 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
3233 || USE_STORE_PRE_INCREMENT (mem_mode
))
3234 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3235 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
3236 || USE_STORE_PRE_DECREMENT (mem_mode
))
3237 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3239 enum tree_code code
= MINUS_EXPR
;
3241 tree new_step
= step
;
3243 if (POINTER_TYPE_P (TREE_TYPE (base
)))
3245 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3246 code
= POINTER_PLUS_EXPR
;
3249 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
3250 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
3251 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
3254 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
3255 || USE_STORE_POST_INCREMENT (mem_mode
))
3256 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3257 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
3258 || USE_STORE_POST_DECREMENT (mem_mode
))
3259 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3261 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
3266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3267 position to POS. If USE is not NULL, the candidate is set as related to
3268 it. The candidate computation is scheduled before exit condition and at
3272 add_candidate (struct ivopts_data
*data
,
3273 tree base
, tree step
, bool important
, struct iv_use
*use
,
3274 struct iv
*orig_iv
= NULL
)
3276 if (ip_normal_pos (data
->current_loop
))
3277 add_candidate_1 (data
, base
, step
, important
,
3278 IP_NORMAL
, use
, NULL
, orig_iv
);
3279 if (ip_end_pos (data
->current_loop
)
3280 && allow_ip_end_pos_p (data
->current_loop
))
3281 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3284 /* Adds standard iv candidates. */
3287 add_standard_iv_candidates (struct ivopts_data
*data
)
3289 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3291 /* The same for a double-integer type if it is still fast enough. */
3293 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3294 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3295 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3296 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3298 /* The same for a double-integer type if it is still fast enough. */
3300 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3301 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3302 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3303 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3307 /* Adds candidates bases on the old induction variable IV. */
3310 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3314 struct iv_cand
*cand
;
3316 /* Check if this biv is used in address type use. */
3317 if (iv
->no_overflow
&& iv
->have_address_use
3318 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3319 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3321 tree base
= fold_convert (sizetype
, iv
->base
);
3322 tree step
= fold_convert (sizetype
, iv
->step
);
3324 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3325 add_candidate (data
, base
, step
, true, NULL
, iv
);
3326 /* Add iv cand of the original type only if it has nonlinear use. */
3328 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3331 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3333 /* The same, but with initial value zero. */
3334 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3335 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3337 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3338 iv
->step
, true, NULL
);
3340 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3341 if (gimple_code (phi
) == GIMPLE_PHI
)
3343 /* Additionally record the possibility of leaving the original iv
3345 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3346 /* Don't add candidate if it's from another PHI node because
3347 it's an affine iv appearing in the form of PEELED_CHREC. */
3348 phi
= SSA_NAME_DEF_STMT (def
);
3349 if (gimple_code (phi
) != GIMPLE_PHI
)
3351 cand
= add_candidate_1 (data
,
3352 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3353 SSA_NAME_DEF_STMT (def
));
3356 cand
->var_before
= iv
->ssa_name
;
3357 cand
->var_after
= def
;
3361 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3365 /* Adds candidates based on the old induction variables. */
3368 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3374 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3376 iv
= ver_info (data
, i
)->iv
;
3377 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3378 add_iv_candidate_for_biv (data
, iv
);
3382 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3385 record_common_cand (struct ivopts_data
*data
, tree base
,
3386 tree step
, struct iv_use
*use
)
3388 struct iv_common_cand ent
;
3389 struct iv_common_cand
**slot
;
3393 ent
.hash
= iterative_hash_expr (base
, 0);
3394 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3396 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3399 *slot
= new iv_common_cand ();
3400 (*slot
)->base
= base
;
3401 (*slot
)->step
= step
;
3402 (*slot
)->uses
.create (8);
3403 (*slot
)->hash
= ent
.hash
;
3404 data
->iv_common_cands
.safe_push ((*slot
));
3407 gcc_assert (use
!= NULL
);
3408 (*slot
)->uses
.safe_push (use
);
3412 /* Comparison function used to sort common candidates. */
3415 common_cand_cmp (const void *p1
, const void *p2
)
3418 const struct iv_common_cand
*const *const ccand1
3419 = (const struct iv_common_cand
*const *)p1
;
3420 const struct iv_common_cand
*const *const ccand2
3421 = (const struct iv_common_cand
*const *)p2
;
3423 n1
= (*ccand1
)->uses
.length ();
3424 n2
= (*ccand2
)->uses
.length ();
3428 /* Adds IV candidates based on common candidated recorded. */
3431 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3434 struct iv_cand
*cand_1
, *cand_2
;
3436 data
->iv_common_cands
.qsort (common_cand_cmp
);
3437 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3439 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3441 /* Only add IV candidate if it's derived from multiple uses. */
3442 if (ptr
->uses
.length () <= 1)
3447 if (ip_normal_pos (data
->current_loop
))
3448 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3449 false, IP_NORMAL
, NULL
, NULL
);
3451 if (ip_end_pos (data
->current_loop
)
3452 && allow_ip_end_pos_p (data
->current_loop
))
3453 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3454 false, IP_END
, NULL
, NULL
);
3456 /* Bind deriving uses and the new candidates. */
3457 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3459 struct iv_group
*group
= data
->vgroups
[ptr
->uses
[j
]->group_id
];
3461 bitmap_set_bit (group
->related_cands
, cand_1
->id
);
3463 bitmap_set_bit (group
->related_cands
, cand_2
->id
);
3467 /* Release data since it is useless from this point. */
3468 data
->iv_common_cand_tab
->empty ();
3469 data
->iv_common_cands
.truncate (0);
3472 /* Adds candidates based on the value of USE's iv. */
3475 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3480 struct iv
*iv
= use
->iv
;
3482 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3484 /* Record common candidate for use in case it can be shared by others. */
3485 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3487 /* Record common candidate with initial value zero. */
3488 basetype
= TREE_TYPE (iv
->base
);
3489 if (POINTER_TYPE_P (basetype
))
3490 basetype
= sizetype
;
3491 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3493 /* Record common candidate with constant offset stripped in base.
3494 Like the use itself, we also add candidate directly for it. */
3495 base
= strip_offset (iv
->base
, &offset
);
3496 if (maybe_ne (offset
, 0U) || base
!= iv
->base
)
3498 record_common_cand (data
, base
, iv
->step
, use
);
3499 add_candidate (data
, base
, iv
->step
, false, use
);
3502 /* Record common candidate with base_object removed in base. */
3505 if (iv
->base_object
!= NULL
&& TREE_CODE (base
) == POINTER_PLUS_EXPR
)
3507 tree step
= iv
->step
;
3510 base
= TREE_OPERAND (base
, 1);
3511 step
= fold_convert (sizetype
, step
);
3512 record_common_cand (data
, base
, step
, use
);
3513 /* Also record common candidate with offset stripped. */
3514 base
= strip_offset (base
, &offset
);
3515 if (maybe_ne (offset
, 0U))
3516 record_common_cand (data
, base
, step
, use
);
3519 /* At last, add auto-incremental candidates. Make such variables
3520 important since other iv uses with same base object may be based
3522 if (use
!= NULL
&& address_p (use
->type
))
3523 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3526 /* Adds candidates based on the uses. */
3529 add_iv_candidate_for_groups (struct ivopts_data
*data
)
3533 /* Only add candidate for the first use in group. */
3534 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3536 struct iv_group
*group
= data
->vgroups
[i
];
3538 gcc_assert (group
->vuses
[0] != NULL
);
3539 add_iv_candidate_for_use (data
, group
->vuses
[0]);
3541 add_iv_candidate_derived_from_uses (data
);
3544 /* Record important candidates and add them to related_cands bitmaps. */
3547 record_important_candidates (struct ivopts_data
*data
)
3550 struct iv_group
*group
;
3552 for (i
= 0; i
< data
->vcands
.length (); i
++)
3554 struct iv_cand
*cand
= data
->vcands
[i
];
3556 if (cand
->important
)
3557 bitmap_set_bit (data
->important_candidates
, i
);
3560 data
->consider_all_candidates
= (data
->vcands
.length ()
3561 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3563 /* Add important candidates to groups' related_cands bitmaps. */
3564 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3566 group
= data
->vgroups
[i
];
3567 bitmap_ior_into (group
->related_cands
, data
->important_candidates
);
3571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3573 we allocate a simple list to every use. */
3576 alloc_use_cost_map (struct ivopts_data
*data
)
3578 unsigned i
, size
, s
;
3580 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3582 struct iv_group
*group
= data
->vgroups
[i
];
3584 if (data
->consider_all_candidates
)
3585 size
= data
->vcands
.length ();
3588 s
= bitmap_count_bits (group
->related_cands
);
3590 /* Round up to the power of two, so that moduling by it is fast. */
3591 size
= s
? (1 << ceil_log2 (s
)) : 1;
3594 group
->n_map_members
= size
;
3595 group
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3599 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3600 on invariants INV_VARS and that the value used in expressing it is
3601 VALUE, and in case of iv elimination the comparison operator is COMP. */
3604 set_group_iv_cost (struct ivopts_data
*data
,
3605 struct iv_group
*group
, struct iv_cand
*cand
,
3606 comp_cost cost
, bitmap inv_vars
, tree value
,
3607 enum tree_code comp
, bitmap inv_exprs
)
3611 if (cost
.infinite_cost_p ())
3613 BITMAP_FREE (inv_vars
);
3614 BITMAP_FREE (inv_exprs
);
3618 if (data
->consider_all_candidates
)
3620 group
->cost_map
[cand
->id
].cand
= cand
;
3621 group
->cost_map
[cand
->id
].cost
= cost
;
3622 group
->cost_map
[cand
->id
].inv_vars
= inv_vars
;
3623 group
->cost_map
[cand
->id
].inv_exprs
= inv_exprs
;
3624 group
->cost_map
[cand
->id
].value
= value
;
3625 group
->cost_map
[cand
->id
].comp
= comp
;
3629 /* n_map_members is a power of two, so this computes modulo. */
3630 s
= cand
->id
& (group
->n_map_members
- 1);
3631 for (i
= s
; i
< group
->n_map_members
; i
++)
3632 if (!group
->cost_map
[i
].cand
)
3634 for (i
= 0; i
< s
; i
++)
3635 if (!group
->cost_map
[i
].cand
)
3641 group
->cost_map
[i
].cand
= cand
;
3642 group
->cost_map
[i
].cost
= cost
;
3643 group
->cost_map
[i
].inv_vars
= inv_vars
;
3644 group
->cost_map
[i
].inv_exprs
= inv_exprs
;
3645 group
->cost_map
[i
].value
= value
;
3646 group
->cost_map
[i
].comp
= comp
;
3649 /* Gets cost of (GROUP, CAND) pair. */
3651 static struct cost_pair
*
3652 get_group_iv_cost (struct ivopts_data
*data
, struct iv_group
*group
,
3653 struct iv_cand
*cand
)
3656 struct cost_pair
*ret
;
3661 if (data
->consider_all_candidates
)
3663 ret
= group
->cost_map
+ cand
->id
;
3670 /* n_map_members is a power of two, so this computes modulo. */
3671 s
= cand
->id
& (group
->n_map_members
- 1);
3672 for (i
= s
; i
< group
->n_map_members
; i
++)
3673 if (group
->cost_map
[i
].cand
== cand
)
3674 return group
->cost_map
+ i
;
3675 else if (group
->cost_map
[i
].cand
== NULL
)
3677 for (i
= 0; i
< s
; i
++)
3678 if (group
->cost_map
[i
].cand
== cand
)
3679 return group
->cost_map
+ i
;
3680 else if (group
->cost_map
[i
].cand
== NULL
)
3686 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3688 produce_memory_decl_rtl (tree obj
, int *regno
)
3690 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3691 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3695 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3697 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3698 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3699 SET_SYMBOL_REF_DECL (x
, obj
);
3700 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3701 set_mem_addr_space (x
, as
);
3702 targetm
.encode_section_info (obj
, x
, true);
3706 x
= gen_raw_REG (address_mode
, (*regno
)++);
3707 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3708 set_mem_addr_space (x
, as
);
3714 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3715 walk_tree. DATA contains the actual fake register number. */
3718 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3720 tree obj
= NULL_TREE
;
3722 int *regno
= (int *) data
;
3724 switch (TREE_CODE (*expr_p
))
3727 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3728 handled_component_p (*expr_p
);
3729 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3732 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3733 x
= produce_memory_decl_rtl (obj
, regno
);
3738 obj
= SSA_NAME_VAR (*expr_p
);
3739 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3742 if (!DECL_RTL_SET_P (obj
))
3743 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3752 if (DECL_RTL_SET_P (obj
))
3755 if (DECL_MODE (obj
) == BLKmode
)
3756 x
= produce_memory_decl_rtl (obj
, regno
);
3758 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3768 decl_rtl_to_reset
.safe_push (obj
);
3769 SET_DECL_RTL (obj
, x
);
3775 /* Determines cost of the computation of EXPR. */
3778 computation_cost (tree expr
, bool speed
)
3782 tree type
= TREE_TYPE (expr
);
3784 /* Avoid using hard regs in ways which may be unsupported. */
3785 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3786 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3787 enum node_frequency real_frequency
= node
->frequency
;
3789 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3790 crtl
->maybe_hot_insn_p
= speed
;
3791 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3793 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3796 default_rtl_profile ();
3797 node
->frequency
= real_frequency
;
3799 cost
= seq_cost (seq
, speed
);
3801 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3802 TYPE_ADDR_SPACE (type
), speed
);
3803 else if (!REG_P (rslt
))
3804 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3809 /* Returns variable containing the value of candidate CAND at statement AT. */
3812 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3814 if (stmt_after_increment (loop
, cand
, stmt
))
3815 return cand
->var_after
;
3817 return cand
->var_before
;
3820 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3821 same precision that is at least as wide as the precision of TYPE, stores
3822 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3826 determine_common_wider_type (tree
*a
, tree
*b
)
3828 tree wider_type
= NULL
;
3830 tree atype
= TREE_TYPE (*a
);
3832 if (CONVERT_EXPR_P (*a
))
3834 suba
= TREE_OPERAND (*a
, 0);
3835 wider_type
= TREE_TYPE (suba
);
3836 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3842 if (CONVERT_EXPR_P (*b
))
3844 subb
= TREE_OPERAND (*b
, 0);
3845 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3856 /* Determines the expression by that USE is expressed from induction variable
3857 CAND at statement AT in LOOP. The expression is stored in two parts in a
3858 decomposed form. The invariant part is stored in AFF_INV; while variant
3859 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3860 non-null. Returns false if USE cannot be expressed using CAND. */
3863 get_computation_aff_1 (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3864 struct iv_cand
*cand
, struct aff_tree
*aff_inv
,
3865 struct aff_tree
*aff_var
, widest_int
*prat
= NULL
)
3867 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3868 tree cbase
= cand
->iv
->base
, cstep
= cand
->iv
->step
;
3869 tree common_type
, uutype
, var
, cstep_common
;
3870 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3874 /* We must have a precision to express the values of use. */
3875 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3878 var
= var_at_stmt (loop
, cand
, at
);
3879 uutype
= unsigned_type_for (utype
);
3881 /* If the conversion is not noop, perform it. */
3882 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3884 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3885 && (CONVERT_EXPR_P (cstep
) || poly_int_tree_p (cstep
)))
3887 tree inner_base
, inner_step
, inner_type
;
3888 inner_base
= TREE_OPERAND (cbase
, 0);
3889 if (CONVERT_EXPR_P (cstep
))
3890 inner_step
= TREE_OPERAND (cstep
, 0);
3894 inner_type
= TREE_TYPE (inner_base
);
3895 /* If candidate is added from a biv whose type is smaller than
3896 ctype, we know both candidate and the biv won't overflow.
3897 In this case, it's safe to skip the convertion in candidate.
3898 As an example, (unsigned short)((unsigned long)A) equals to
3899 (unsigned short)A, if A has a type no larger than short. */
3900 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3906 cbase
= fold_convert (uutype
, cbase
);
3907 cstep
= fold_convert (uutype
, cstep
);
3908 var
= fold_convert (uutype
, var
);
3911 /* Ratio is 1 when computing the value of biv cand by itself.
3912 We can't rely on constant_multiple_of in this case because the
3913 use is created after the original biv is selected. The call
3914 could fail because of inconsistent fold behavior. See PR68021
3915 for more information. */
3916 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
3918 gcc_assert (is_gimple_assign (use
->stmt
));
3919 gcc_assert (use
->iv
->ssa_name
== cand
->var_after
);
3920 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
3923 else if (!constant_multiple_of (ustep
, cstep
, &rat
))
3929 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3930 type, we achieve better folding by computing their difference in this
3931 wider type, and cast the result to UUTYPE. We do not need to worry about
3932 overflows, as all the arithmetics will in the end be performed in UUTYPE
3934 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3936 /* use = ubase - ratio * cbase + ratio * var. */
3937 tree_to_aff_combination (ubase
, common_type
, aff_inv
);
3938 tree_to_aff_combination (cbase
, common_type
, &aff_cbase
);
3939 tree_to_aff_combination (var
, uutype
, aff_var
);
3941 /* We need to shift the value if we are after the increment. */
3942 if (stmt_after_increment (loop
, cand
, at
))
3946 if (common_type
!= uutype
)
3947 cstep_common
= fold_convert (common_type
, cstep
);
3949 cstep_common
= cstep
;
3951 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3952 aff_combination_add (&aff_cbase
, &cstep_aff
);
3955 aff_combination_scale (&aff_cbase
, -rat
);
3956 aff_combination_add (aff_inv
, &aff_cbase
);
3957 if (common_type
!= uutype
)
3958 aff_combination_convert (aff_inv
, uutype
);
3960 aff_combination_scale (aff_var
, rat
);
3964 /* Determines the expression by that USE is expressed from induction variable
3965 CAND at statement AT in LOOP. The expression is stored in a decomposed
3966 form into AFF. Returns false if USE cannot be expressed using CAND. */
3969 get_computation_aff (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3970 struct iv_cand
*cand
, struct aff_tree
*aff
)
3974 if (!get_computation_aff_1 (loop
, at
, use
, cand
, aff
, &aff_var
))
3977 aff_combination_add (aff
, &aff_var
);
3981 /* Return the type of USE. */
3984 get_use_type (struct iv_use
*use
)
3986 tree base_type
= TREE_TYPE (use
->iv
->base
);
3989 if (use
->type
== USE_REF_ADDRESS
)
3991 /* The base_type may be a void pointer. Create a pointer type based on
3992 the mem_ref instead. */
3993 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3994 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3995 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
4003 /* Determines the expression by that USE is expressed from induction variable
4004 CAND at statement AT in LOOP. The computation is unshared. */
4007 get_computation_at (struct loop
*loop
, gimple
*at
,
4008 struct iv_use
*use
, struct iv_cand
*cand
)
4011 tree type
= get_use_type (use
);
4013 if (!get_computation_aff (loop
, at
, use
, cand
, &aff
))
4015 unshare_aff_combination (&aff
);
4016 return fold_convert (type
, aff_combination_to_tree (&aff
));
4019 /* Adjust the cost COST for being in loop setup rather than loop body.
4020 If we're optimizing for space, the loop setup overhead is constant;
4021 if we're optimizing for speed, amortize it over the per-iteration cost.
4022 If ROUND_UP_P is true, the result is round up rather than to zero when
4023 optimizing for speed. */
4025 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
,
4026 bool round_up_p
= false)
4030 else if (optimize_loop_for_speed_p (data
->current_loop
))
4032 HOST_WIDE_INT niters
= avg_loop_niter (data
->current_loop
);
4033 return ((HOST_WIDE_INT
) cost
+ (round_up_p
? niters
- 1 : 0)) / niters
;
4039 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4040 EXPR operand holding the shift. COST0 and COST1 are the costs for
4041 calculating the operands of EXPR. Returns true if successful, and returns
4042 the cost in COST. */
4045 get_shiftadd_cost (tree expr
, scalar_int_mode mode
, comp_cost cost0
,
4046 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4049 tree op1
= TREE_OPERAND (expr
, 1);
4050 tree cst
= TREE_OPERAND (mult
, 1);
4051 tree multop
= TREE_OPERAND (mult
, 0);
4052 int m
= exact_log2 (int_cst_value (cst
));
4053 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4054 int as_cost
, sa_cost
;
4057 if (!(m
>= 0 && m
< maxm
))
4061 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4063 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4065 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4066 use that in preference to a shift insn followed by an add insn. */
4067 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4068 ? shiftadd_cost (speed
, mode
, m
)
4070 ? shiftsub1_cost (speed
, mode
, m
)
4071 : shiftsub0_cost (speed
, mode
, m
)));
4073 res
= comp_cost (MIN (as_cost
, sa_cost
), 0);
4074 res
+= (mult_in_op1
? cost0
: cost1
);
4076 STRIP_NOPS (multop
);
4077 if (!is_gimple_val (multop
))
4078 res
+= force_expr_to_var_cost (multop
, speed
);
4084 /* Estimates cost of forcing expression EXPR into a variable. */
4087 force_expr_to_var_cost (tree expr
, bool speed
)
4089 static bool costs_initialized
= false;
4090 static unsigned integer_cost
[2];
4091 static unsigned symbol_cost
[2];
4092 static unsigned address_cost
[2];
4094 comp_cost cost0
, cost1
, cost
;
4096 scalar_int_mode int_mode
;
4098 if (!costs_initialized
)
4100 tree type
= build_pointer_type (integer_type_node
);
4105 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4106 TREE_STATIC (var
) = 1;
4107 x
= produce_memory_decl_rtl (var
, NULL
);
4108 SET_DECL_RTL (var
, x
);
4110 addr
= build1 (ADDR_EXPR
, type
, var
);
4113 for (i
= 0; i
< 2; i
++)
4115 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4118 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4121 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4122 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4124 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4125 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4126 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4127 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4128 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4129 fprintf (dump_file
, "\n");
4133 costs_initialized
= true;
4138 if (SSA_VAR_P (expr
))
4141 if (is_gimple_min_invariant (expr
))
4143 if (poly_int_tree_p (expr
))
4144 return comp_cost (integer_cost
[speed
], 0);
4146 if (TREE_CODE (expr
) == ADDR_EXPR
)
4148 tree obj
= TREE_OPERAND (expr
, 0);
4151 || TREE_CODE (obj
) == PARM_DECL
4152 || TREE_CODE (obj
) == RESULT_DECL
)
4153 return comp_cost (symbol_cost
[speed
], 0);
4156 return comp_cost (address_cost
[speed
], 0);
4159 switch (TREE_CODE (expr
))
4161 case POINTER_PLUS_EXPR
:
4165 case TRUNC_DIV_EXPR
:
4170 op0
= TREE_OPERAND (expr
, 0);
4171 op1
= TREE_OPERAND (expr
, 1);
4179 op0
= TREE_OPERAND (expr
, 0);
4185 /* Just an arbitrary value, FIXME. */
4186 return comp_cost (target_spill_cost
[speed
], 0);
4189 if (op0
== NULL_TREE
4190 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4193 cost0
= force_expr_to_var_cost (op0
, speed
);
4195 if (op1
== NULL_TREE
4196 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4199 cost1
= force_expr_to_var_cost (op1
, speed
);
4201 mode
= TYPE_MODE (TREE_TYPE (expr
));
4202 switch (TREE_CODE (expr
))
4204 case POINTER_PLUS_EXPR
:
4208 cost
= comp_cost (add_cost (speed
, mode
), 0);
4209 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4211 tree mult
= NULL_TREE
;
4213 if (TREE_CODE (op1
) == MULT_EXPR
)
4215 else if (TREE_CODE (op0
) == MULT_EXPR
)
4218 if (mult
!= NULL_TREE
4219 && is_a
<scalar_int_mode
> (mode
, &int_mode
)
4220 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4221 && get_shiftadd_cost (expr
, int_mode
, cost0
, cost1
, mult
,
4229 tree inner_mode
, outer_mode
;
4230 outer_mode
= TREE_TYPE (expr
);
4231 inner_mode
= TREE_TYPE (op0
);
4232 cost
= comp_cost (convert_cost (TYPE_MODE (outer_mode
),
4233 TYPE_MODE (inner_mode
), speed
), 0);
4238 if (cst_and_fits_in_hwi (op0
))
4239 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op0
),
4241 else if (cst_and_fits_in_hwi (op1
))
4242 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op1
),
4245 return comp_cost (target_spill_cost
[speed
], 0);
4248 case TRUNC_DIV_EXPR
:
4249 /* Division by power of two is usually cheap, so we allow it. Forbid
4251 if (integer_pow2p (TREE_OPERAND (expr
, 1)))
4252 cost
= comp_cost (add_cost (speed
, mode
), 0);
4254 cost
= comp_cost (target_spill_cost
[speed
], 0);
4262 cost
= comp_cost (add_cost (speed
, mode
), 0);
4274 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4275 invariants the computation depends on. */
4278 force_var_cost (struct ivopts_data
*data
, tree expr
, bitmap
*inv_vars
)
4283 find_inv_vars (data
, &expr
, inv_vars
);
4284 return force_expr_to_var_cost (expr
, data
->speed
);
4287 /* Returns cost of auto-modifying address expression in shape base + offset.
4288 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4289 address expression. The address expression has ADDR_MODE in addr space
4290 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4295 AINC_PRE_INC
, /* Pre increment. */
4296 AINC_PRE_DEC
, /* Pre decrement. */
4297 AINC_POST_INC
, /* Post increment. */
4298 AINC_POST_DEC
, /* Post decrement. */
4299 AINC_NONE
/* Also the number of auto increment types. */
4302 struct ainc_cost_data
4304 unsigned costs
[AINC_NONE
];
4308 get_address_cost_ainc (poly_int64 ainc_step
, poly_int64 ainc_offset
,
4309 machine_mode addr_mode
, machine_mode mem_mode
,
4310 addr_space_t as
, bool speed
)
4312 if (!USE_LOAD_PRE_DECREMENT (mem_mode
)
4313 && !USE_STORE_PRE_DECREMENT (mem_mode
)
4314 && !USE_LOAD_POST_DECREMENT (mem_mode
)
4315 && !USE_STORE_POST_DECREMENT (mem_mode
)
4316 && !USE_LOAD_PRE_INCREMENT (mem_mode
)
4317 && !USE_STORE_PRE_INCREMENT (mem_mode
)
4318 && !USE_LOAD_POST_INCREMENT (mem_mode
)
4319 && !USE_STORE_POST_INCREMENT (mem_mode
))
4320 return infinite_cost
;
4322 static vec
<ainc_cost_data
*> ainc_cost_data_list
;
4323 unsigned idx
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
4324 if (idx
>= ainc_cost_data_list
.length ())
4326 unsigned nsize
= ((unsigned) as
+ 1) *MAX_MACHINE_MODE
;
4328 gcc_assert (nsize
> idx
);
4329 ainc_cost_data_list
.safe_grow_cleared (nsize
);
4332 ainc_cost_data
*data
= ainc_cost_data_list
[idx
];
4335 rtx reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4337 data
= (ainc_cost_data
*) xcalloc (1, sizeof (*data
));
4338 data
->costs
[AINC_PRE_DEC
] = INFTY
;
4339 data
->costs
[AINC_POST_DEC
] = INFTY
;
4340 data
->costs
[AINC_PRE_INC
] = INFTY
;
4341 data
->costs
[AINC_POST_INC
] = INFTY
;
4342 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4343 || USE_STORE_PRE_DECREMENT (mem_mode
))
4345 rtx addr
= gen_rtx_PRE_DEC (addr_mode
, reg
);
4347 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4348 data
->costs
[AINC_PRE_DEC
]
4349 = address_cost (addr
, mem_mode
, as
, speed
);
4351 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4352 || USE_STORE_POST_DECREMENT (mem_mode
))
4354 rtx addr
= gen_rtx_POST_DEC (addr_mode
, reg
);
4356 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4357 data
->costs
[AINC_POST_DEC
]
4358 = address_cost (addr
, mem_mode
, as
, speed
);
4360 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4361 || USE_STORE_PRE_INCREMENT (mem_mode
))
4363 rtx addr
= gen_rtx_PRE_INC (addr_mode
, reg
);
4365 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4366 data
->costs
[AINC_PRE_INC
]
4367 = address_cost (addr
, mem_mode
, as
, speed
);
4369 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4370 || USE_STORE_POST_INCREMENT (mem_mode
))
4372 rtx addr
= gen_rtx_POST_INC (addr_mode
, reg
);
4374 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4375 data
->costs
[AINC_POST_INC
]
4376 = address_cost (addr
, mem_mode
, as
, speed
);
4378 ainc_cost_data_list
[idx
] = data
;
4381 poly_int64 msize
= GET_MODE_SIZE (mem_mode
);
4382 if (known_eq (ainc_offset
, 0) && known_eq (msize
, ainc_step
))
4383 return comp_cost (data
->costs
[AINC_POST_INC
], 0);
4384 if (known_eq (ainc_offset
, 0) && known_eq (msize
, -ainc_step
))
4385 return comp_cost (data
->costs
[AINC_POST_DEC
], 0);
4386 if (known_eq (ainc_offset
, msize
) && known_eq (msize
, ainc_step
))
4387 return comp_cost (data
->costs
[AINC_PRE_INC
], 0);
4388 if (known_eq (ainc_offset
, -msize
) && known_eq (msize
, -ainc_step
))
4389 return comp_cost (data
->costs
[AINC_PRE_DEC
], 0);
4391 return infinite_cost
;
4394 /* Return cost of computing USE's address expression by using CAND.
4395 AFF_INV and AFF_VAR represent invariant and variant parts of the
4396 address expression, respectively. If AFF_INV is simple, store
4397 the loop invariant variables which are depended by it in INV_VARS;
4398 if AFF_INV is complicated, handle it as a new invariant expression
4399 and record it in INV_EXPR. RATIO indicates multiple times between
4400 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4401 value to it indicating if this is an auto-increment address. */
4404 get_address_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4405 struct iv_cand
*cand
, aff_tree
*aff_inv
,
4406 aff_tree
*aff_var
, HOST_WIDE_INT ratio
,
4407 bitmap
*inv_vars
, iv_inv_expr_ent
**inv_expr
,
4408 bool *can_autoinc
, bool speed
)
4411 bool simple_inv
= true;
4412 tree comp_inv
= NULL_TREE
, type
= aff_var
->type
;
4413 comp_cost var_cost
= no_cost
, cost
= no_cost
;
4414 struct mem_address parts
= {NULL_TREE
, integer_one_node
,
4415 NULL_TREE
, NULL_TREE
, NULL_TREE
};
4416 machine_mode addr_mode
= TYPE_MODE (type
);
4417 machine_mode mem_mode
= TYPE_MODE (use
->mem_type
);
4418 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
4419 /* Only true if ratio != 1. */
4420 bool ok_with_ratio_p
= false;
4421 bool ok_without_ratio_p
= false;
4423 if (!aff_combination_const_p (aff_inv
))
4425 parts
.index
= integer_one_node
;
4426 /* Addressing mode "base + index". */
4427 ok_without_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4430 parts
.step
= wide_int_to_tree (type
, ratio
);
4431 /* Addressing mode "base + index << scale". */
4432 ok_with_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4433 if (!ok_with_ratio_p
)
4434 parts
.step
= NULL_TREE
;
4436 if (ok_with_ratio_p
|| ok_without_ratio_p
)
4438 if (maybe_ne (aff_inv
->offset
, 0))
4440 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4441 /* Addressing mode "base + index [<< scale] + offset". */
4442 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4443 parts
.offset
= NULL_TREE
;
4445 aff_inv
->offset
= 0;
4448 move_fixed_address_to_symbol (&parts
, aff_inv
);
4449 /* Base is fixed address and is moved to symbol part. */
4450 if (parts
.symbol
!= NULL_TREE
&& aff_combination_zero_p (aff_inv
))
4451 parts
.base
= NULL_TREE
;
4453 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4454 if (parts
.symbol
!= NULL_TREE
4455 && !valid_mem_ref_p (mem_mode
, as
, &parts
))
4457 aff_combination_add_elt (aff_inv
, parts
.symbol
, 1);
4458 parts
.symbol
= NULL_TREE
;
4459 /* Reset SIMPLE_INV since symbol address needs to be computed
4460 outside of address expression in this case. */
4462 /* Symbol part is moved back to base part, it can't be NULL. */
4463 parts
.base
= integer_one_node
;
4467 parts
.index
= NULL_TREE
;
4471 poly_int64 ainc_step
;
4474 && ptrdiff_tree_p (cand
->iv
->step
, &ainc_step
))
4476 poly_int64 ainc_offset
= (aff_inv
->offset
).force_shwi ();
4478 if (stmt_after_increment (data
->current_loop
, cand
, use
->stmt
))
4479 ainc_offset
+= ainc_step
;
4480 cost
= get_address_cost_ainc (ainc_step
, ainc_offset
,
4481 addr_mode
, mem_mode
, as
, speed
);
4482 if (!cost
.infinite_cost_p ())
4484 *can_autoinc
= true;
4489 if (!aff_combination_zero_p (aff_inv
))
4491 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4492 /* Addressing mode "base + offset". */
4493 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4494 parts
.offset
= NULL_TREE
;
4496 aff_inv
->offset
= 0;
4501 simple_inv
= (aff_inv
== NULL
4502 || aff_combination_const_p (aff_inv
)
4503 || aff_combination_singleton_var_p (aff_inv
));
4504 if (!aff_combination_zero_p (aff_inv
))
4505 comp_inv
= aff_combination_to_tree (aff_inv
);
4506 if (comp_inv
!= NULL_TREE
)
4507 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4508 if (ratio
!= 1 && parts
.step
== NULL_TREE
)
4509 var_cost
+= mult_by_coeff_cost (ratio
, addr_mode
, speed
);
4510 if (comp_inv
!= NULL_TREE
&& parts
.index
== NULL_TREE
)
4511 var_cost
+= add_cost (speed
, addr_mode
);
4513 if (comp_inv
&& inv_expr
&& !simple_inv
)
4515 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4516 /* Clear depends on. */
4517 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4518 bitmap_clear (*inv_vars
);
4520 /* Cost of small invariant expression adjusted against loop niters
4521 is usually zero, which makes it difficult to be differentiated
4522 from candidate based on loop invariant variables. Secondly, the
4523 generated invariant expression may not be hoisted out of loop by
4524 following pass. We penalize the cost by rounding up in order to
4525 neutralize such effects. */
4526 cost
.cost
= adjust_setup_cost (data
, cost
.cost
, true);
4527 cost
.scratch
= cost
.cost
;
4531 addr
= addr_for_mem_ref (&parts
, as
, false);
4532 gcc_assert (memory_address_addr_space_p (mem_mode
, addr
, as
));
4533 cost
+= address_cost (addr
, mem_mode
, as
, speed
);
4535 if (parts
.symbol
!= NULL_TREE
)
4536 cost
.complexity
+= 1;
4537 /* Don't increase the complexity of adding a scaled index if it's
4538 the only kind of index that the target allows. */
4539 if (parts
.step
!= NULL_TREE
&& ok_without_ratio_p
)
4540 cost
.complexity
+= 1;
4541 if (parts
.base
!= NULL_TREE
&& parts
.index
!= NULL_TREE
)
4542 cost
.complexity
+= 1;
4543 if (parts
.offset
!= NULL_TREE
&& !integer_zerop (parts
.offset
))
4544 cost
.complexity
+= 1;
4549 /* Scale (multiply) the computed COST (except scratch part that should be
4550 hoisted out a loop) by header->frequency / AT->frequency, which makes
4551 expected cost more accurate. */
4554 get_scaled_computation_cost_at (ivopts_data
*data
, gimple
*at
, comp_cost cost
)
4556 int loop_freq
= data
->current_loop
->header
->count
.to_frequency (cfun
);
4557 int bb_freq
= gimple_bb (at
)->count
.to_frequency (cfun
);
4560 gcc_assert (cost
.scratch
<= cost
.cost
);
4562 = cost
.scratch
+ (cost
.cost
- cost
.scratch
) * bb_freq
/ loop_freq
;
4564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4565 fprintf (dump_file
, "Scaling cost based on bb prob "
4566 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4567 1.0f
* bb_freq
/ loop_freq
, cost
.cost
,
4568 cost
.scratch
, scaled_cost
, bb_freq
, loop_freq
);
4570 cost
.cost
= scaled_cost
;
4576 /* Determines the cost of the computation by that USE is expressed
4577 from induction variable CAND. If ADDRESS_P is true, we just need
4578 to create an address from it, otherwise we want to get it into
4579 register. A set of invariants we depend on is stored in INV_VARS.
4580 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4581 addressing is likely. If INV_EXPR is nonnull, record invariant
4582 expr entry in it. */
4585 get_computation_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4586 struct iv_cand
*cand
, bool address_p
, bitmap
*inv_vars
,
4587 bool *can_autoinc
, iv_inv_expr_ent
**inv_expr
)
4589 gimple
*at
= use
->stmt
;
4590 tree ubase
= use
->iv
->base
, cbase
= cand
->iv
->base
;
4591 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
4592 tree comp_inv
= NULL_TREE
;
4593 HOST_WIDE_INT ratio
, aratio
;
4596 aff_tree aff_inv
, aff_var
;
4597 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4602 *can_autoinc
= false;
4606 /* Check if we have enough precision to express the values of use. */
4607 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4608 return infinite_cost
;
4611 || (use
->iv
->base_object
4612 && cand
->iv
->base_object
4613 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4614 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4616 /* Do not try to express address of an object with computation based
4617 on address of a different object. This may cause problems in rtl
4618 level alias analysis (that does not expect this to be happening,
4619 as this is illegal in C), and would be unlikely to be useful
4621 if (use
->iv
->base_object
4622 && cand
->iv
->base_object
4623 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4624 return infinite_cost
;
4627 if (!get_computation_aff_1 (data
->current_loop
, at
, use
,
4628 cand
, &aff_inv
, &aff_var
, &rat
)
4629 || !wi::fits_shwi_p (rat
))
4630 return infinite_cost
;
4632 ratio
= rat
.to_shwi ();
4635 cost
= get_address_cost (data
, use
, cand
, &aff_inv
, &aff_var
, ratio
,
4636 inv_vars
, inv_expr
, can_autoinc
, speed
);
4637 return get_scaled_computation_cost_at (data
, at
, cost
);
4640 bool simple_inv
= (aff_combination_const_p (&aff_inv
)
4641 || aff_combination_singleton_var_p (&aff_inv
));
4642 tree signed_type
= signed_type_for (aff_combination_type (&aff_inv
));
4643 aff_combination_convert (&aff_inv
, signed_type
);
4644 if (!aff_combination_zero_p (&aff_inv
))
4645 comp_inv
= aff_combination_to_tree (&aff_inv
);
4647 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4648 if (comp_inv
&& inv_expr
&& !simple_inv
)
4650 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4651 /* Clear depends on. */
4652 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4653 bitmap_clear (*inv_vars
);
4655 cost
.cost
= adjust_setup_cost (data
, cost
.cost
);
4656 /* Record setup cost in scratch field. */
4657 cost
.scratch
= cost
.cost
;
4659 /* Cost of constant integer can be covered when adding invariant part to
4661 else if (comp_inv
&& CONSTANT_CLASS_P (comp_inv
))
4664 /* Need type narrowing to represent use with cand. */
4665 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4667 machine_mode outer_mode
= TYPE_MODE (utype
);
4668 machine_mode inner_mode
= TYPE_MODE (ctype
);
4669 cost
+= comp_cost (convert_cost (outer_mode
, inner_mode
, speed
), 0);
4672 /* Turn a + i * (-c) into a - i * c. */
4673 if (ratio
< 0 && comp_inv
&& !integer_zerop (comp_inv
))
4679 cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (utype
), speed
);
4681 /* TODO: We may also need to check if we can compute a + i * 4 in one
4683 /* Need to add up the invariant and variant parts. */
4684 if (comp_inv
&& !integer_zerop (comp_inv
))
4685 cost
+= add_cost (speed
, TYPE_MODE (utype
));
4687 return get_scaled_computation_cost_at (data
, at
, cost
);
4690 /* Determines cost of computing the use in GROUP with CAND in a generic
4694 determine_group_iv_cost_generic (struct ivopts_data
*data
,
4695 struct iv_group
*group
, struct iv_cand
*cand
)
4698 iv_inv_expr_ent
*inv_expr
= NULL
;
4699 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4700 struct iv_use
*use
= group
->vuses
[0];
4702 /* The simple case first -- if we need to express value of the preserved
4703 original biv, the cost is 0. This also prevents us from counting the
4704 cost of increment twice -- once at this use and once in the cost of
4706 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
4709 cost
= get_computation_cost (data
, use
, cand
, false,
4710 &inv_vars
, NULL
, &inv_expr
);
4714 inv_exprs
= BITMAP_ALLOC (NULL
);
4715 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4717 set_group_iv_cost (data
, group
, cand
, cost
, inv_vars
,
4718 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4719 return !cost
.infinite_cost_p ();
4722 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4725 determine_group_iv_cost_address (struct ivopts_data
*data
,
4726 struct iv_group
*group
, struct iv_cand
*cand
)
4729 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4731 iv_inv_expr_ent
*inv_expr
= NULL
;
4732 struct iv_use
*use
= group
->vuses
[0];
4733 comp_cost sum_cost
= no_cost
, cost
;
4735 cost
= get_computation_cost (data
, use
, cand
, true,
4736 &inv_vars
, &can_autoinc
, &inv_expr
);
4740 inv_exprs
= BITMAP_ALLOC (NULL
);
4741 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4744 if (!sum_cost
.infinite_cost_p () && cand
->ainc_use
== use
)
4747 sum_cost
-= cand
->cost_step
;
4748 /* If we generated the candidate solely for exploiting autoincrement
4749 opportunities, and it turns out it can't be used, set the cost to
4750 infinity to make sure we ignore it. */
4751 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4752 sum_cost
= infinite_cost
;
4755 /* Uses in a group can share setup code, so only add setup cost once. */
4756 cost
-= cost
.scratch
;
4757 /* Compute and add costs for rest uses of this group. */
4758 for (i
= 1; i
< group
->vuses
.length () && !sum_cost
.infinite_cost_p (); i
++)
4760 struct iv_use
*next
= group
->vuses
[i
];
4762 /* TODO: We could skip computing cost for sub iv_use when it has the
4763 same cost as the first iv_use, but the cost really depends on the
4764 offset and where the iv_use is. */
4765 cost
= get_computation_cost (data
, next
, cand
, true,
4766 NULL
, &can_autoinc
, &inv_expr
);
4770 inv_exprs
= BITMAP_ALLOC (NULL
);
4772 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4776 set_group_iv_cost (data
, group
, cand
, sum_cost
, inv_vars
,
4777 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4779 return !sum_cost
.infinite_cost_p ();
4782 /* Computes value of candidate CAND at position AT in iteration NITER, and
4783 stores it to VAL. */
4786 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4789 aff_tree step
, delta
, nit
;
4790 struct iv
*iv
= cand
->iv
;
4791 tree type
= TREE_TYPE (iv
->base
);
4793 if (POINTER_TYPE_P (type
))
4794 steptype
= sizetype
;
4796 steptype
= unsigned_type_for (type
);
4798 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4799 aff_combination_convert (&step
, steptype
);
4800 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4801 aff_combination_convert (&nit
, steptype
);
4802 aff_combination_mult (&nit
, &step
, &delta
);
4803 if (stmt_after_increment (loop
, cand
, at
))
4804 aff_combination_add (&delta
, &step
);
4806 tree_to_aff_combination (iv
->base
, type
, val
);
4807 if (!POINTER_TYPE_P (type
))
4808 aff_combination_convert (val
, steptype
);
4809 aff_combination_add (val
, &delta
);
4812 /* Returns period of induction variable iv. */
4815 iv_period (struct iv
*iv
)
4817 tree step
= iv
->step
, period
, type
;
4820 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4822 type
= unsigned_type_for (TREE_TYPE (step
));
4823 /* Period of the iv is lcm (step, type_range)/step -1,
4824 i.e., N*type_range/step - 1. Since type range is power
4825 of two, N == (step >> num_of_ending_zeros_binary (step),
4826 so the final result is
4828 (type_range >> num_of_ending_zeros_binary (step)) - 1
4831 pow2div
= num_ending_zeros (step
);
4833 period
= build_low_bits_mask (type
,
4834 (TYPE_PRECISION (type
)
4835 - tree_to_uhwi (pow2div
)));
4840 /* Returns the comparison operator used when eliminating the iv USE. */
4842 static enum tree_code
4843 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4845 struct loop
*loop
= data
->current_loop
;
4849 ex_bb
= gimple_bb (use
->stmt
);
4850 exit
= EDGE_SUCC (ex_bb
, 0);
4851 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4852 exit
= EDGE_SUCC (ex_bb
, 1);
4854 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4857 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4858 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4859 calculation is performed in non-wrapping type.
4861 TODO: More generally, we could test for the situation that
4862 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4863 This would require knowing the sign of OFFSET. */
4866 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4868 enum tree_code code
;
4870 aff_tree aff_e1
, aff_e2
, aff_offset
;
4872 if (!nowrap_type_p (TREE_TYPE (base
)))
4875 base
= expand_simple_operations (base
);
4877 if (TREE_CODE (base
) == SSA_NAME
)
4879 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
4881 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4884 code
= gimple_assign_rhs_code (stmt
);
4885 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4888 e1
= gimple_assign_rhs1 (stmt
);
4889 e2
= gimple_assign_rhs2 (stmt
);
4893 code
= TREE_CODE (base
);
4894 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4896 e1
= TREE_OPERAND (base
, 0);
4897 e2
= TREE_OPERAND (base
, 1);
4900 /* Use affine expansion as deeper inspection to prove the equality. */
4901 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4902 &aff_e2
, &data
->name_expansion_cache
);
4903 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4904 &aff_offset
, &data
->name_expansion_cache
);
4905 aff_combination_scale (&aff_offset
, -1);
4909 aff_combination_add (&aff_e2
, &aff_offset
);
4910 if (aff_combination_zero_p (&aff_e2
))
4913 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4914 &aff_e1
, &data
->name_expansion_cache
);
4915 aff_combination_add (&aff_e1
, &aff_offset
);
4916 return aff_combination_zero_p (&aff_e1
);
4918 case POINTER_PLUS_EXPR
:
4919 aff_combination_add (&aff_e2
, &aff_offset
);
4920 return aff_combination_zero_p (&aff_e2
);
4927 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4928 comparison with CAND. NITER describes the number of iterations of
4929 the loops. If successful, the comparison in COMP_P is altered accordingly.
4931 We aim to handle the following situation:
4947 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4948 We aim to optimize this to
4956 while (p < p_0 - a + b);
4958 This preserves the correctness, since the pointer arithmetics does not
4959 overflow. More precisely:
4961 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4962 overflow in computing it or the values of p.
4963 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4964 overflow. To prove this, we use the fact that p_0 = base + a. */
4967 iv_elimination_compare_lt (struct ivopts_data
*data
,
4968 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4969 struct tree_niter_desc
*niter
)
4971 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4972 struct aff_tree nit
, tmpa
, tmpb
;
4973 enum tree_code comp
;
4976 /* We need to know that the candidate induction variable does not overflow.
4977 While more complex analysis may be used to prove this, for now just
4978 check that the variable appears in the original program and that it
4979 is computed in a type that guarantees no overflows. */
4980 cand_type
= TREE_TYPE (cand
->iv
->base
);
4981 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4984 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4985 the calculation of the BOUND could overflow, making the comparison
4987 if (!data
->loop_single_exit_p
)
4990 /* We need to be able to decide whether candidate is increasing or decreasing
4991 in order to choose the right comparison operator. */
4992 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4994 step
= int_cst_value (cand
->iv
->step
);
4996 /* Check that the number of iterations matches the expected pattern:
4997 a + 1 > b ? 0 : b - a - 1. */
4998 mbz
= niter
->may_be_zero
;
4999 if (TREE_CODE (mbz
) == GT_EXPR
)
5001 /* Handle a + 1 > b. */
5002 tree op0
= TREE_OPERAND (mbz
, 0);
5003 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5005 a
= TREE_OPERAND (op0
, 0);
5006 b
= TREE_OPERAND (mbz
, 1);
5011 else if (TREE_CODE (mbz
) == LT_EXPR
)
5013 tree op1
= TREE_OPERAND (mbz
, 1);
5015 /* Handle b < a + 1. */
5016 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5018 a
= TREE_OPERAND (op1
, 0);
5019 b
= TREE_OPERAND (mbz
, 0);
5027 /* Expected number of iterations is B - A - 1. Check that it matches
5028 the actual number, i.e., that B - A - NITER = 1. */
5029 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5030 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5031 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5032 aff_combination_scale (&nit
, -1);
5033 aff_combination_scale (&tmpa
, -1);
5034 aff_combination_add (&tmpb
, &tmpa
);
5035 aff_combination_add (&tmpb
, &nit
);
5036 if (tmpb
.n
!= 0 || maybe_ne (tmpb
.offset
, 1))
5039 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5041 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5043 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5044 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5047 /* Determine the new comparison operator. */
5048 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5049 if (*comp_p
== NE_EXPR
)
5051 else if (*comp_p
== EQ_EXPR
)
5052 *comp_p
= invert_tree_comparison (comp
, false);
5059 /* Check whether it is possible to express the condition in USE by comparison
5060 of candidate CAND. If so, store the value compared with to BOUND, and the
5061 comparison operator to COMP. */
5064 may_eliminate_iv (struct ivopts_data
*data
,
5065 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5066 enum tree_code
*comp
)
5071 struct loop
*loop
= data
->current_loop
;
5073 struct tree_niter_desc
*desc
= NULL
;
5075 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5078 /* For now works only for exits that dominate the loop latch.
5079 TODO: extend to other conditions inside loop body. */
5080 ex_bb
= gimple_bb (use
->stmt
);
5081 if (use
->stmt
!= last_stmt (ex_bb
)
5082 || gimple_code (use
->stmt
) != GIMPLE_COND
5083 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5086 exit
= EDGE_SUCC (ex_bb
, 0);
5087 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5088 exit
= EDGE_SUCC (ex_bb
, 1);
5089 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5092 desc
= niter_for_exit (data
, exit
);
5096 /* Determine whether we can use the variable to test the exit condition.
5097 This is the case iff the period of the induction variable is greater
5098 than the number of iterations for which the exit condition is true. */
5099 period
= iv_period (cand
->iv
);
5101 /* If the number of iterations is constant, compare against it directly. */
5102 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5104 /* See cand_value_at. */
5105 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5107 if (!tree_int_cst_lt (desc
->niter
, period
))
5112 if (tree_int_cst_lt (period
, desc
->niter
))
5117 /* If not, and if this is the only possible exit of the loop, see whether
5118 we can get a conservative estimate on the number of iterations of the
5119 entire loop and compare against that instead. */
5122 widest_int period_value
, max_niter
;
5124 max_niter
= desc
->max
;
5125 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5127 period_value
= wi::to_widest (period
);
5128 if (wi::gtu_p (max_niter
, period_value
))
5130 /* See if we can take advantage of inferred loop bound
5132 if (data
->loop_single_exit_p
)
5134 if (!max_loop_iterations (loop
, &max_niter
))
5136 /* The loop bound is already adjusted by adding 1. */
5137 if (wi::gtu_p (max_niter
, period_value
))
5145 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5147 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5148 aff_combination_to_tree (&bnd
));
5149 *comp
= iv_elimination_compare (data
, use
);
5151 /* It is unlikely that computing the number of iterations using division
5152 would be more profitable than keeping the original induction variable. */
5153 if (expression_expensive_p (*bound
))
5156 /* Sometimes, it is possible to handle the situation that the number of
5157 iterations may be zero unless additional assumptions by using <
5158 instead of != in the exit condition.
5160 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5161 base the exit condition on it. However, that is often too
5163 if (!integer_zerop (desc
->may_be_zero
))
5164 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5169 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5170 be copied, if it is used in the loop body and DATA->body_includes_call. */
5173 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5175 tree sbound
= bound
;
5176 STRIP_NOPS (sbound
);
5178 if (TREE_CODE (sbound
) == SSA_NAME
5179 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5180 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5181 && data
->body_includes_call
)
5182 return COSTS_N_INSNS (1);
5187 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5190 determine_group_iv_cost_cond (struct ivopts_data
*data
,
5191 struct iv_group
*group
, struct iv_cand
*cand
)
5193 tree bound
= NULL_TREE
;
5195 bitmap inv_exprs
= NULL
;
5196 bitmap inv_vars_elim
= NULL
, inv_vars_express
= NULL
, inv_vars
;
5197 comp_cost elim_cost
= infinite_cost
, express_cost
, cost
, bound_cost
;
5198 enum comp_iv_rewrite rewrite_type
;
5199 iv_inv_expr_ent
*inv_expr_elim
= NULL
, *inv_expr_express
= NULL
, *inv_expr
;
5200 tree
*control_var
, *bound_cst
;
5201 enum tree_code comp
= ERROR_MARK
;
5202 struct iv_use
*use
= group
->vuses
[0];
5204 /* Extract condition operands. */
5205 rewrite_type
= extract_cond_operands (data
, use
->stmt
, &control_var
,
5206 &bound_cst
, NULL
, &cmp_iv
);
5207 gcc_assert (rewrite_type
!= COMP_IV_NA
);
5209 /* Try iv elimination. */
5210 if (rewrite_type
== COMP_IV_ELIM
5211 && may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5213 elim_cost
= force_var_cost (data
, bound
, &inv_vars_elim
);
5214 if (elim_cost
.cost
== 0)
5215 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5216 else if (TREE_CODE (bound
) == INTEGER_CST
)
5218 /* If we replace a loop condition 'i < n' with 'p < base + n',
5219 inv_vars_elim will have 'base' and 'n' set, which implies that both
5220 'base' and 'n' will be live during the loop. More likely,
5221 'base + n' will be loop invariant, resulting in only one live value
5222 during the loop. So in that case we clear inv_vars_elim and set
5223 inv_expr_elim instead. */
5224 if (inv_vars_elim
&& bitmap_count_bits (inv_vars_elim
) > 1)
5226 inv_expr_elim
= get_loop_invariant_expr (data
, bound
);
5227 bitmap_clear (inv_vars_elim
);
5229 /* The bound is a loop invariant, so it will be only computed
5231 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5234 /* When the condition is a comparison of the candidate IV against
5235 zero, prefer this IV.
5237 TODO: The constant that we're subtracting from the cost should
5238 be target-dependent. This information should be added to the
5239 target costs for each backend. */
5240 if (!elim_cost
.infinite_cost_p () /* Do not try to decrease infinite! */
5241 && integer_zerop (*bound_cst
)
5242 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5243 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5246 express_cost
= get_computation_cost (data
, use
, cand
, false,
5247 &inv_vars_express
, NULL
,
5250 find_inv_vars (data
, &cmp_iv
->base
, &inv_vars_express
);
5252 /* Count the cost of the original bound as well. */
5253 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5254 if (bound_cost
.cost
== 0)
5255 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5256 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5257 bound_cost
.cost
= 0;
5258 express_cost
+= bound_cost
;
5260 /* Choose the better approach, preferring the eliminated IV. */
5261 if (elim_cost
<= express_cost
)
5264 inv_vars
= inv_vars_elim
;
5265 inv_vars_elim
= NULL
;
5266 inv_expr
= inv_expr_elim
;
5270 cost
= express_cost
;
5271 inv_vars
= inv_vars_express
;
5272 inv_vars_express
= NULL
;
5275 inv_expr
= inv_expr_express
;
5280 inv_exprs
= BITMAP_ALLOC (NULL
);
5281 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
5283 set_group_iv_cost (data
, group
, cand
, cost
,
5284 inv_vars
, bound
, comp
, inv_exprs
);
5287 BITMAP_FREE (inv_vars_elim
);
5288 if (inv_vars_express
)
5289 BITMAP_FREE (inv_vars_express
);
5291 return !cost
.infinite_cost_p ();
5294 /* Determines cost of computing uses in GROUP with CAND. Returns false
5295 if USE cannot be represented with CAND. */
5298 determine_group_iv_cost (struct ivopts_data
*data
,
5299 struct iv_group
*group
, struct iv_cand
*cand
)
5301 switch (group
->type
)
5303 case USE_NONLINEAR_EXPR
:
5304 return determine_group_iv_cost_generic (data
, group
, cand
);
5306 case USE_REF_ADDRESS
:
5307 case USE_PTR_ADDRESS
:
5308 return determine_group_iv_cost_address (data
, group
, cand
);
5311 return determine_group_iv_cost_cond (data
, group
, cand
);
5318 /* Return true if get_computation_cost indicates that autoincrement is
5319 a possibility for the pair of USE and CAND, false otherwise. */
5322 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5323 struct iv_cand
*cand
)
5325 if (!address_p (use
->type
))
5328 bool can_autoinc
= false;
5329 get_computation_cost (data
, use
, cand
, true, NULL
, &can_autoinc
, NULL
);
5333 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5334 use that allows autoincrement, and set their AINC_USE if possible. */
5337 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5341 for (i
= 0; i
< data
->vcands
.length (); i
++)
5343 struct iv_cand
*cand
= data
->vcands
[i
];
5344 struct iv_use
*closest_before
= NULL
;
5345 struct iv_use
*closest_after
= NULL
;
5346 if (cand
->pos
!= IP_ORIGINAL
)
5349 for (j
= 0; j
< data
->vgroups
.length (); j
++)
5351 struct iv_group
*group
= data
->vgroups
[j
];
5352 struct iv_use
*use
= group
->vuses
[0];
5353 unsigned uid
= gimple_uid (use
->stmt
);
5355 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5358 if (uid
< gimple_uid (cand
->incremented_at
)
5359 && (closest_before
== NULL
5360 || uid
> gimple_uid (closest_before
->stmt
)))
5361 closest_before
= use
;
5363 if (uid
> gimple_uid (cand
->incremented_at
)
5364 && (closest_after
== NULL
5365 || uid
< gimple_uid (closest_after
->stmt
)))
5366 closest_after
= use
;
5369 if (closest_before
!= NULL
5370 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5371 cand
->ainc_use
= closest_before
;
5372 else if (closest_after
!= NULL
5373 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5374 cand
->ainc_use
= closest_after
;
5378 /* Relate compare use with all candidates. */
5381 relate_compare_use_with_all_cands (struct ivopts_data
*data
)
5383 unsigned i
, count
= data
->vcands
.length ();
5384 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5386 struct iv_group
*group
= data
->vgroups
[i
];
5388 if (group
->type
== USE_COMPARE
)
5389 bitmap_set_range (group
->related_cands
, 0, count
);
5393 /* Finds the candidates for the induction variables. */
5396 find_iv_candidates (struct ivopts_data
*data
)
5398 /* Add commonly used ivs. */
5399 add_standard_iv_candidates (data
);
5401 /* Add old induction variables. */
5402 add_iv_candidate_for_bivs (data
);
5404 /* Add induction variables derived from uses. */
5405 add_iv_candidate_for_groups (data
);
5407 set_autoinc_for_original_candidates (data
);
5409 /* Record the important candidates. */
5410 record_important_candidates (data
);
5412 /* Relate compare iv_use with all candidates. */
5413 if (!data
->consider_all_candidates
)
5414 relate_compare_use_with_all_cands (data
);
5416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5420 fprintf (dump_file
, "\n<Important Candidates>:\t");
5421 for (i
= 0; i
< data
->vcands
.length (); i
++)
5422 if (data
->vcands
[i
]->important
)
5423 fprintf (dump_file
, " %d,", data
->vcands
[i
]->id
);
5424 fprintf (dump_file
, "\n");
5426 fprintf (dump_file
, "\n<Group, Cand> Related:\n");
5427 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5429 struct iv_group
*group
= data
->vgroups
[i
];
5431 if (group
->related_cands
)
5433 fprintf (dump_file
, " Group %d:\t", group
->id
);
5434 dump_bitmap (dump_file
, group
->related_cands
);
5437 fprintf (dump_file
, "\n");
5441 /* Determines costs of computing use of iv with an iv candidate. */
5444 determine_group_iv_costs (struct ivopts_data
*data
)
5447 struct iv_cand
*cand
;
5448 struct iv_group
*group
;
5449 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5451 alloc_use_cost_map (data
);
5453 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5455 group
= data
->vgroups
[i
];
5457 if (data
->consider_all_candidates
)
5459 for (j
= 0; j
< data
->vcands
.length (); j
++)
5461 cand
= data
->vcands
[j
];
5462 determine_group_iv_cost (data
, group
, cand
);
5469 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, j
, bi
)
5471 cand
= data
->vcands
[j
];
5472 if (!determine_group_iv_cost (data
, group
, cand
))
5473 bitmap_set_bit (to_clear
, j
);
5476 /* Remove the candidates for that the cost is infinite from
5477 the list of related candidates. */
5478 bitmap_and_compl_into (group
->related_cands
, to_clear
);
5479 bitmap_clear (to_clear
);
5483 BITMAP_FREE (to_clear
);
5485 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5489 /* Dump invariant variables. */
5490 fprintf (dump_file
, "\n<Invariant Vars>:\n");
5491 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5493 struct version_info
*info
= ver_info (data
, i
);
5496 fprintf (dump_file
, "Inv %d:\t", info
->inv_id
);
5497 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
5498 fprintf (dump_file
, "%s\n",
5499 info
->has_nonlin_use
? "" : "\t(eliminable)");
5503 /* Dump invariant expressions. */
5504 fprintf (dump_file
, "\n<Invariant Expressions>:\n");
5505 auto_vec
<iv_inv_expr_ent
*> list (data
->inv_expr_tab
->elements ());
5507 for (hash_table
<iv_inv_expr_hasher
>::iterator it
5508 = data
->inv_expr_tab
->begin (); it
!= data
->inv_expr_tab
->end ();
5510 list
.safe_push (*it
);
5512 list
.qsort (sort_iv_inv_expr_ent
);
5514 for (i
= 0; i
< list
.length (); ++i
)
5516 fprintf (dump_file
, "inv_expr %d: \t", list
[i
]->id
);
5517 print_generic_expr (dump_file
, list
[i
]->expr
, TDF_SLIM
);
5518 fprintf (dump_file
, "\n");
5521 fprintf (dump_file
, "\n<Group-candidate Costs>:\n");
5523 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5525 group
= data
->vgroups
[i
];
5527 fprintf (dump_file
, "Group %d:\n", i
);
5528 fprintf (dump_file
, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5529 for (j
= 0; j
< group
->n_map_members
; j
++)
5531 if (!group
->cost_map
[j
].cand
5532 || group
->cost_map
[j
].cost
.infinite_cost_p ())
5535 fprintf (dump_file
, " %d\t%d\t%d\t",
5536 group
->cost_map
[j
].cand
->id
,
5537 group
->cost_map
[j
].cost
.cost
,
5538 group
->cost_map
[j
].cost
.complexity
);
5539 if (!group
->cost_map
[j
].inv_exprs
5540 || bitmap_empty_p (group
->cost_map
[j
].inv_exprs
))
5541 fprintf (dump_file
, "NIL;\t");
5543 bitmap_print (dump_file
,
5544 group
->cost_map
[j
].inv_exprs
, "", ";\t");
5545 if (!group
->cost_map
[j
].inv_vars
5546 || bitmap_empty_p (group
->cost_map
[j
].inv_vars
))
5547 fprintf (dump_file
, "NIL;\n");
5549 bitmap_print (dump_file
,
5550 group
->cost_map
[j
].inv_vars
, "", "\n");
5553 fprintf (dump_file
, "\n");
5555 fprintf (dump_file
, "\n");
5559 /* Determines cost of the candidate CAND. */
5562 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5564 comp_cost cost_base
;
5565 unsigned cost
, cost_step
;
5568 gcc_assert (cand
->iv
!= NULL
);
5570 /* There are two costs associated with the candidate -- its increment
5571 and its initialization. The second is almost negligible for any loop
5572 that rolls enough, so we take it just very little into account. */
5574 base
= cand
->iv
->base
;
5575 cost_base
= force_var_cost (data
, base
, NULL
);
5576 /* It will be exceptional that the iv register happens to be initialized with
5577 the proper value at no cost. In general, there will at least be a regcopy
5579 if (cost_base
.cost
== 0)
5580 cost_base
.cost
= COSTS_N_INSNS (1);
5581 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5583 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5585 /* Prefer the original ivs unless we may gain something by replacing it.
5586 The reason is to make debugging simpler; so this is not relevant for
5587 artificial ivs created by other optimization passes. */
5588 if (cand
->pos
!= IP_ORIGINAL
5589 || !SSA_NAME_VAR (cand
->var_before
)
5590 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5593 /* Prefer not to insert statements into latch unless there are some
5594 already (so that we do not create unnecessary jumps). */
5595 if (cand
->pos
== IP_END
5596 && empty_block_p (ip_end_pos (data
->current_loop
)))
5600 cand
->cost_step
= cost_step
;
5603 /* Determines costs of computation of the candidates. */
5606 determine_iv_costs (struct ivopts_data
*data
)
5610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5612 fprintf (dump_file
, "<Candidate Costs>:\n");
5613 fprintf (dump_file
, " cand\tcost\n");
5616 for (i
= 0; i
< data
->vcands
.length (); i
++)
5618 struct iv_cand
*cand
= data
->vcands
[i
];
5620 determine_iv_cost (data
, cand
);
5622 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5623 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5627 fprintf (dump_file
, "\n");
5630 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5631 induction variables. Note N_INVS includes both invariant variables and
5632 invariant expressions. */
5635 ivopts_estimate_reg_pressure (struct ivopts_data
*data
, unsigned n_invs
,
5639 unsigned n_old
= data
->regs_used
, n_new
= n_invs
+ n_cands
;
5640 unsigned regs_needed
= n_new
+ n_old
, available_regs
= target_avail_regs
;
5641 bool speed
= data
->speed
;
5643 /* If there is a call in the loop body, the call-clobbered registers
5644 are not available for loop invariants. */
5645 if (data
->body_includes_call
)
5646 available_regs
= available_regs
- target_clobbered_regs
;
5648 /* If we have enough registers. */
5649 if (regs_needed
+ target_res_regs
< available_regs
)
5651 /* If close to running out of registers, try to preserve them. */
5652 else if (regs_needed
<= available_regs
)
5653 cost
= target_reg_cost
[speed
] * regs_needed
;
5654 /* If we run out of available registers but the number of candidates
5655 does not, we penalize extra registers using target_spill_cost. */
5656 else if (n_cands
<= available_regs
)
5657 cost
= target_reg_cost
[speed
] * available_regs
5658 + target_spill_cost
[speed
] * (regs_needed
- available_regs
);
5659 /* If the number of candidates runs out available registers, we penalize
5660 extra candidate registers using target_spill_cost * 2. Because it is
5661 more expensive to spill induction variable than invariant. */
5663 cost
= target_reg_cost
[speed
] * available_regs
5664 + target_spill_cost
[speed
] * (n_cands
- available_regs
) * 2
5665 + target_spill_cost
[speed
] * (regs_needed
- n_cands
);
5667 /* Finally, add the number of candidates, so that we prefer eliminating
5668 induction variables if possible. */
5669 return cost
+ n_cands
;
5672 /* For each size of the induction variable set determine the penalty. */
5675 determine_set_costs (struct ivopts_data
*data
)
5681 struct loop
*loop
= data
->current_loop
;
5684 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5686 fprintf (dump_file
, "<Global Costs>:\n");
5687 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5688 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5689 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5690 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5694 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5697 op
= PHI_RESULT (phi
);
5699 if (virtual_operand_p (op
))
5702 if (get_iv (data
, op
))
5705 if (!POINTER_TYPE_P (TREE_TYPE (op
))
5706 && !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5712 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5714 struct version_info
*info
= ver_info (data
, j
);
5716 if (info
->inv_id
&& info
->has_nonlin_use
)
5720 data
->regs_used
= n
;
5721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5722 fprintf (dump_file
, " regs_used %d\n", n
);
5724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5726 fprintf (dump_file
, " cost for size:\n");
5727 fprintf (dump_file
, " ivs\tcost\n");
5728 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5729 fprintf (dump_file
, " %d\t%d\n", j
,
5730 ivopts_estimate_reg_pressure (data
, 0, j
));
5731 fprintf (dump_file
, "\n");
5735 /* Returns true if A is a cheaper cost pair than B. */
5738 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5746 if (a
->cost
< b
->cost
)
5749 if (b
->cost
< a
->cost
)
5752 /* In case the costs are the same, prefer the cheaper candidate. */
5753 if (a
->cand
->cost
< b
->cand
->cost
)
5759 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5760 for more expensive, equal and cheaper respectively. */
5763 compare_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5765 if (cheaper_cost_pair (a
, b
))
5767 if (cheaper_cost_pair (b
, a
))
5773 /* Returns candidate by that USE is expressed in IVS. */
5775 static struct cost_pair
*
5776 iv_ca_cand_for_group (struct iv_ca
*ivs
, struct iv_group
*group
)
5778 return ivs
->cand_for_group
[group
->id
];
5781 /* Computes the cost field of IVS structure. */
5784 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5786 comp_cost cost
= ivs
->cand_use_cost
;
5788 cost
+= ivs
->cand_cost
;
5789 cost
+= ivopts_estimate_reg_pressure (data
, ivs
->n_invs
, ivs
->n_cands
);
5793 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5797 iv_ca_set_remove_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5805 gcc_assert (n_inv_uses
!= NULL
);
5806 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5809 if (n_inv_uses
[iid
] == 0)
5814 /* Set USE not to be expressed by any candidate in IVS. */
5817 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5818 struct iv_group
*group
)
5820 unsigned gid
= group
->id
, cid
;
5821 struct cost_pair
*cp
;
5823 cp
= ivs
->cand_for_group
[gid
];
5829 ivs
->cand_for_group
[gid
] = NULL
;
5830 ivs
->n_cand_uses
[cid
]--;
5832 if (ivs
->n_cand_uses
[cid
] == 0)
5834 bitmap_clear_bit (ivs
->cands
, cid
);
5836 ivs
->cand_cost
-= cp
->cand
->cost
;
5837 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5838 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5841 ivs
->cand_use_cost
-= cp
->cost
;
5842 iv_ca_set_remove_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5843 iv_ca_set_remove_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5844 iv_ca_recount_cost (data
, ivs
);
5847 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5851 iv_ca_set_add_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5859 gcc_assert (n_inv_uses
!= NULL
);
5860 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5863 if (n_inv_uses
[iid
] == 1)
5868 /* Set cost pair for GROUP in set IVS to CP. */
5871 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5872 struct iv_group
*group
, struct cost_pair
*cp
)
5874 unsigned gid
= group
->id
, cid
;
5876 if (ivs
->cand_for_group
[gid
] == cp
)
5879 if (ivs
->cand_for_group
[gid
])
5880 iv_ca_set_no_cp (data
, ivs
, group
);
5887 ivs
->cand_for_group
[gid
] = cp
;
5888 ivs
->n_cand_uses
[cid
]++;
5889 if (ivs
->n_cand_uses
[cid
] == 1)
5891 bitmap_set_bit (ivs
->cands
, cid
);
5893 ivs
->cand_cost
+= cp
->cand
->cost
;
5894 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5895 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5898 ivs
->cand_use_cost
+= cp
->cost
;
5899 iv_ca_set_add_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5900 iv_ca_set_add_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5901 iv_ca_recount_cost (data
, ivs
);
5905 /* Extend set IVS by expressing USE by some of the candidates in it
5906 if possible. Consider all important candidates if candidates in
5907 set IVS don't give any result. */
5910 iv_ca_add_group (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5911 struct iv_group
*group
)
5913 struct cost_pair
*best_cp
= NULL
, *cp
;
5916 struct iv_cand
*cand
;
5918 gcc_assert (ivs
->upto
>= group
->id
);
5922 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5924 cand
= data
->vcands
[i
];
5925 cp
= get_group_iv_cost (data
, group
, cand
);
5926 if (cheaper_cost_pair (cp
, best_cp
))
5930 if (best_cp
== NULL
)
5932 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5934 cand
= data
->vcands
[i
];
5935 cp
= get_group_iv_cost (data
, group
, cand
);
5936 if (cheaper_cost_pair (cp
, best_cp
))
5941 iv_ca_set_cp (data
, ivs
, group
, best_cp
);
5944 /* Get cost for assignment IVS. */
5947 iv_ca_cost (struct iv_ca
*ivs
)
5949 /* This was a conditional expression but it triggered a bug in
5951 if (ivs
->bad_groups
)
5952 return infinite_cost
;
5957 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5958 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5962 iv_ca_compare_deps (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5963 struct iv_group
*group
, struct cost_pair
*old_cp
,
5964 struct cost_pair
*new_cp
)
5966 gcc_assert (old_cp
&& new_cp
&& old_cp
!= new_cp
);
5967 unsigned old_n_invs
= ivs
->n_invs
;
5968 iv_ca_set_cp (data
, ivs
, group
, new_cp
);
5969 unsigned new_n_invs
= ivs
->n_invs
;
5970 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
5972 return new_n_invs
> old_n_invs
? 1 : (new_n_invs
< old_n_invs
? -1 : 0);
5975 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5978 static struct iv_ca_delta
*
5979 iv_ca_delta_add (struct iv_group
*group
, struct cost_pair
*old_cp
,
5980 struct cost_pair
*new_cp
, struct iv_ca_delta
*next
)
5982 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5984 change
->group
= group
;
5985 change
->old_cp
= old_cp
;
5986 change
->new_cp
= new_cp
;
5987 change
->next
= next
;
5992 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5995 static struct iv_ca_delta
*
5996 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5998 struct iv_ca_delta
*last
;
6006 for (last
= l1
; last
->next
; last
= last
->next
)
6013 /* Reverse the list of changes DELTA, forming the inverse to it. */
6015 static struct iv_ca_delta
*
6016 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6018 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6020 for (act
= delta
; act
; act
= next
)
6026 std::swap (act
->old_cp
, act
->new_cp
);
6032 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6033 reverted instead. */
6036 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6037 struct iv_ca_delta
*delta
, bool forward
)
6039 struct cost_pair
*from
, *to
;
6040 struct iv_ca_delta
*act
;
6043 delta
= iv_ca_delta_reverse (delta
);
6045 for (act
= delta
; act
; act
= act
->next
)
6049 gcc_assert (iv_ca_cand_for_group (ivs
, act
->group
) == from
);
6050 iv_ca_set_cp (data
, ivs
, act
->group
, to
);
6054 iv_ca_delta_reverse (delta
);
6057 /* Returns true if CAND is used in IVS. */
6060 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6062 return ivs
->n_cand_uses
[cand
->id
] > 0;
6065 /* Returns number of induction variable candidates in the set IVS. */
6068 iv_ca_n_cands (struct iv_ca
*ivs
)
6070 return ivs
->n_cands
;
6073 /* Free the list of changes DELTA. */
6076 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6078 struct iv_ca_delta
*act
, *next
;
6080 for (act
= *delta
; act
; act
= next
)
6089 /* Allocates new iv candidates assignment. */
6091 static struct iv_ca
*
6092 iv_ca_new (struct ivopts_data
*data
)
6094 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6098 nw
->cand_for_group
= XCNEWVEC (struct cost_pair
*,
6099 data
->vgroups
.length ());
6100 nw
->n_cand_uses
= XCNEWVEC (unsigned, data
->vcands
.length ());
6101 nw
->cands
= BITMAP_ALLOC (NULL
);
6104 nw
->cand_use_cost
= no_cost
;
6106 nw
->n_inv_var_uses
= XCNEWVEC (unsigned, data
->max_inv_var_id
+ 1);
6107 nw
->n_inv_expr_uses
= XCNEWVEC (unsigned, data
->max_inv_expr_id
+ 1);
6113 /* Free memory occupied by the set IVS. */
6116 iv_ca_free (struct iv_ca
**ivs
)
6118 free ((*ivs
)->cand_for_group
);
6119 free ((*ivs
)->n_cand_uses
);
6120 BITMAP_FREE ((*ivs
)->cands
);
6121 free ((*ivs
)->n_inv_var_uses
);
6122 free ((*ivs
)->n_inv_expr_uses
);
6127 /* Dumps IVS to FILE. */
6130 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6133 comp_cost cost
= iv_ca_cost (ivs
);
6135 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
,
6137 fprintf (file
, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6138 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
,
6139 ivs
->cand_use_cost
.complexity
);
6140 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6142 for (i
= 0; i
< ivs
->upto
; i
++)
6144 struct iv_group
*group
= data
->vgroups
[i
];
6145 struct cost_pair
*cp
= iv_ca_cand_for_group (ivs
, group
);
6147 fprintf (file
, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6148 group
->id
, cp
->cand
->id
, cp
->cost
.cost
,
6149 cp
->cost
.complexity
);
6151 fprintf (file
, " group:%d --> ??\n", group
->id
);
6154 const char *pref
= "";
6155 fprintf (file
, " invariant variables: ");
6156 for (i
= 1; i
<= data
->max_inv_var_id
; i
++)
6157 if (ivs
->n_inv_var_uses
[i
])
6159 fprintf (file
, "%s%d", pref
, i
);
6164 fprintf (file
, "\n invariant expressions: ");
6165 for (i
= 1; i
<= data
->max_inv_expr_id
; i
++)
6166 if (ivs
->n_inv_expr_uses
[i
])
6168 fprintf (file
, "%s%d", pref
, i
);
6172 fprintf (file
, "\n\n");
6175 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6176 new set, and store differences in DELTA. Number of induction variables
6177 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6178 the function will try to find a solution with mimimal iv candidates. */
6181 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6182 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6183 unsigned *n_ivs
, bool min_ncand
)
6187 struct iv_group
*group
;
6188 struct cost_pair
*old_cp
, *new_cp
;
6191 for (i
= 0; i
< ivs
->upto
; i
++)
6193 group
= data
->vgroups
[i
];
6194 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6197 && old_cp
->cand
== cand
)
6200 new_cp
= get_group_iv_cost (data
, group
, cand
);
6206 int cmp_invs
= iv_ca_compare_deps (data
, ivs
, group
, old_cp
, new_cp
);
6207 /* Skip if new_cp depends on more invariants. */
6211 int cmp_cost
= compare_cost_pair (new_cp
, old_cp
);
6212 /* Skip if new_cp is not cheaper. */
6213 if (cmp_cost
> 0 || (cmp_cost
== 0 && cmp_invs
== 0))
6217 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6220 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6221 cost
= iv_ca_cost (ivs
);
6223 *n_ivs
= iv_ca_n_cands (ivs
);
6224 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6229 /* Try narrowing set IVS by removing CAND. Return the cost of
6230 the new set and store the differences in DELTA. START is
6231 the candidate with which we start narrowing. */
6234 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6235 struct iv_cand
*cand
, struct iv_cand
*start
,
6236 struct iv_ca_delta
**delta
)
6239 struct iv_group
*group
;
6240 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6242 struct iv_cand
*cnd
;
6243 comp_cost cost
, best_cost
, acost
;
6246 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6248 group
= data
->vgroups
[i
];
6250 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6251 if (old_cp
->cand
!= cand
)
6254 best_cost
= iv_ca_cost (ivs
);
6255 /* Start narrowing with START. */
6256 new_cp
= get_group_iv_cost (data
, group
, start
);
6258 if (data
->consider_all_candidates
)
6260 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6262 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6265 cnd
= data
->vcands
[ci
];
6267 cp
= get_group_iv_cost (data
, group
, cnd
);
6271 iv_ca_set_cp (data
, ivs
, group
, cp
);
6272 acost
= iv_ca_cost (ivs
);
6274 if (acost
< best_cost
)
6283 EXECUTE_IF_AND_IN_BITMAP (group
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6285 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6288 cnd
= data
->vcands
[ci
];
6290 cp
= get_group_iv_cost (data
, group
, cnd
);
6294 iv_ca_set_cp (data
, ivs
, group
, cp
);
6295 acost
= iv_ca_cost (ivs
);
6297 if (acost
< best_cost
)
6304 /* Restore to old cp for use. */
6305 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
6309 iv_ca_delta_free (delta
);
6310 return infinite_cost
;
6313 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6316 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6317 cost
= iv_ca_cost (ivs
);
6318 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6323 /* Try optimizing the set of candidates IVS by removing candidates different
6324 from to EXCEPT_CAND from it. Return cost of the new set, and store
6325 differences in DELTA. */
6328 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6329 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6332 struct iv_ca_delta
*act_delta
, *best_delta
;
6334 comp_cost best_cost
, acost
;
6335 struct iv_cand
*cand
;
6338 best_cost
= iv_ca_cost (ivs
);
6340 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6342 cand
= data
->vcands
[i
];
6344 if (cand
== except_cand
)
6347 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6349 if (acost
< best_cost
)
6352 iv_ca_delta_free (&best_delta
);
6353 best_delta
= act_delta
;
6356 iv_ca_delta_free (&act_delta
);
6365 /* Recurse to possibly remove other unnecessary ivs. */
6366 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6367 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6368 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6369 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6373 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6374 cheaper local cost for GROUP than BEST_CP. Return pointer to
6375 the corresponding cost_pair, otherwise just return BEST_CP. */
6377 static struct cost_pair
*
6378 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_group
*group
,
6379 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6380 struct cost_pair
*best_cp
)
6382 struct iv_cand
*cand
;
6383 struct cost_pair
*cp
;
6385 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6386 if (cand_idx
== old_cand
->id
)
6389 cand
= data
->vcands
[cand_idx
];
6390 cp
= get_group_iv_cost (data
, group
, cand
);
6391 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6397 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6398 which are used by more than one iv uses. For each of those candidates,
6399 this function tries to represent iv uses under that candidate using
6400 other ones with lower local cost, then tries to prune the new set.
6401 If the new set has lower cost, It returns the new cost after recording
6402 candidate replacement in list DELTA. */
6405 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6406 struct iv_ca_delta
**delta
)
6408 bitmap_iterator bi
, bj
;
6409 unsigned int i
, j
, k
;
6410 struct iv_cand
*cand
;
6411 comp_cost orig_cost
, acost
;
6412 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6413 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6416 orig_cost
= iv_ca_cost (ivs
);
6418 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6420 if (ivs
->n_cand_uses
[i
] == 1
6421 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6424 cand
= data
->vcands
[i
];
6427 /* Represent uses under current candidate using other ones with
6428 lower local cost. */
6429 for (j
= 0; j
< ivs
->upto
; j
++)
6431 struct iv_group
*group
= data
->vgroups
[j
];
6432 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6434 if (old_cp
->cand
!= cand
)
6438 if (data
->consider_all_candidates
)
6439 for (k
= 0; k
< data
->vcands
.length (); k
++)
6440 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6441 old_cp
->cand
, best_cp
);
6443 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, k
, bj
)
6444 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6445 old_cp
->cand
, best_cp
);
6447 if (best_cp
== old_cp
)
6450 act_delta
= iv_ca_delta_add (group
, old_cp
, best_cp
, act_delta
);
6452 /* No need for further prune. */
6456 /* Prune the new candidate set. */
6457 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6458 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6459 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6460 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6462 if (acost
< orig_cost
)
6468 iv_ca_delta_free (&act_delta
);
6474 /* Tries to extend the sets IVS in the best possible way in order to
6475 express the GROUP. If ORIGINALP is true, prefer candidates from
6476 the original set of IVs, otherwise favor important candidates not
6477 based on any memory object. */
6480 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6481 struct iv_group
*group
, bool originalp
)
6483 comp_cost best_cost
, act_cost
;
6486 struct iv_cand
*cand
;
6487 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6488 struct cost_pair
*cp
;
6490 iv_ca_add_group (data
, ivs
, group
);
6491 best_cost
= iv_ca_cost (ivs
);
6492 cp
= iv_ca_cand_for_group (ivs
, group
);
6495 best_delta
= iv_ca_delta_add (group
, NULL
, cp
, NULL
);
6496 iv_ca_set_no_cp (data
, ivs
, group
);
6499 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6500 first try important candidates not based on any memory object. Only if
6501 this fails, try the specific ones. Rationale -- in loops with many
6502 variables the best choice often is to use just one generic biv. If we
6503 added here many ivs specific to the uses, the optimization algorithm later
6504 would be likely to get stuck in a local minimum, thus causing us to create
6505 too many ivs. The approach from few ivs to more seems more likely to be
6506 successful -- starting from few ivs, replacing an expensive use by a
6507 specific iv should always be a win. */
6508 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, i
, bi
)
6510 cand
= data
->vcands
[i
];
6512 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6515 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6518 if (iv_ca_cand_used_p (ivs
, cand
))
6521 cp
= get_group_iv_cost (data
, group
, cand
);
6525 iv_ca_set_cp (data
, ivs
, group
, cp
);
6526 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6528 iv_ca_set_no_cp (data
, ivs
, group
);
6529 act_delta
= iv_ca_delta_add (group
, NULL
, cp
, act_delta
);
6531 if (act_cost
< best_cost
)
6533 best_cost
= act_cost
;
6535 iv_ca_delta_free (&best_delta
);
6536 best_delta
= act_delta
;
6539 iv_ca_delta_free (&act_delta
);
6542 if (best_cost
.infinite_cost_p ())
6544 for (i
= 0; i
< group
->n_map_members
; i
++)
6546 cp
= group
->cost_map
+ i
;
6551 /* Already tried this. */
6552 if (cand
->important
)
6554 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6556 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6560 if (iv_ca_cand_used_p (ivs
, cand
))
6564 iv_ca_set_cp (data
, ivs
, group
, cp
);
6565 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6566 iv_ca_set_no_cp (data
, ivs
, group
);
6567 act_delta
= iv_ca_delta_add (group
,
6568 iv_ca_cand_for_group (ivs
, group
),
6571 if (act_cost
< best_cost
)
6573 best_cost
= act_cost
;
6576 iv_ca_delta_free (&best_delta
);
6577 best_delta
= act_delta
;
6580 iv_ca_delta_free (&act_delta
);
6584 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6585 iv_ca_delta_free (&best_delta
);
6587 return !best_cost
.infinite_cost_p ();
6590 /* Finds an initial assignment of candidates to uses. */
6592 static struct iv_ca
*
6593 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6596 struct iv_ca
*ivs
= iv_ca_new (data
);
6598 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6599 if (!try_add_cand_for (data
, ivs
, data
->vgroups
[i
], originalp
))
6608 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6609 points to a bool variable, this function tries to break local
6610 optimal fixed-point by replacing candidates in IVS if it's true. */
6613 try_improve_iv_set (struct ivopts_data
*data
,
6614 struct iv_ca
*ivs
, bool *try_replace_p
)
6617 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6618 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6619 struct iv_cand
*cand
;
6621 /* Try extending the set of induction variables by one. */
6622 for (i
= 0; i
< data
->vcands
.length (); i
++)
6624 cand
= data
->vcands
[i
];
6626 if (iv_ca_cand_used_p (ivs
, cand
))
6629 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6633 /* If we successfully added the candidate and the set is small enough,
6634 try optimizing it by removing other candidates. */
6635 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6637 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6638 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6639 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6640 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6643 if (acost
< best_cost
)
6646 iv_ca_delta_free (&best_delta
);
6647 best_delta
= act_delta
;
6650 iv_ca_delta_free (&act_delta
);
6655 /* Try removing the candidates from the set instead. */
6656 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6658 if (!best_delta
&& *try_replace_p
)
6660 *try_replace_p
= false;
6661 /* So far candidate selecting algorithm tends to choose fewer IVs
6662 so that it can handle cases in which loops have many variables
6663 but the best choice is often to use only one general biv. One
6664 weakness is it can't handle opposite cases, in which different
6665 candidates should be chosen with respect to each use. To solve
6666 the problem, we replace candidates in a manner described by the
6667 comments of iv_ca_replace, thus give general algorithm a chance
6668 to break local optimal fixed-point in these cases. */
6669 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6676 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6677 gcc_assert (best_cost
== iv_ca_cost (ivs
));
6678 iv_ca_delta_free (&best_delta
);
6682 /* Attempts to find the optimal set of induction variables. We do simple
6683 greedy heuristic -- we try to replace at most one candidate in the selected
6684 solution and remove the unused ivs while this improves the cost. */
6686 static struct iv_ca
*
6687 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6690 bool try_replace_p
= true;
6692 /* Get the initial solution. */
6693 set
= get_initial_solution (data
, originalp
);
6696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6697 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6703 fprintf (dump_file
, "Initial set of candidates:\n");
6704 iv_ca_dump (data
, dump_file
, set
);
6707 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6711 fprintf (dump_file
, "Improved to:\n");
6712 iv_ca_dump (data
, dump_file
, set
);
6719 static struct iv_ca
*
6720 find_optimal_iv_set (struct ivopts_data
*data
)
6723 comp_cost cost
, origcost
;
6724 struct iv_ca
*set
, *origset
;
6726 /* Determine the cost based on a strategy that starts with original IVs,
6727 and try again using a strategy that prefers candidates not based
6729 origset
= find_optimal_iv_set_1 (data
, true);
6730 set
= find_optimal_iv_set_1 (data
, false);
6732 if (!origset
&& !set
)
6735 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6736 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6740 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6741 origcost
.cost
, origcost
.complexity
);
6742 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6743 cost
.cost
, cost
.complexity
);
6746 /* Choose the one with the best cost. */
6747 if (origcost
<= cost
)
6754 iv_ca_free (&origset
);
6756 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6758 struct iv_group
*group
= data
->vgroups
[i
];
6759 group
->selected
= iv_ca_cand_for_group (set
, group
)->cand
;
6765 /* Creates a new induction variable corresponding to CAND. */
6768 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6770 gimple_stmt_iterator incr_pos
;
6773 struct iv_group
*group
;
6776 gcc_assert (cand
->iv
!= NULL
);
6781 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6785 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6793 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6797 /* Mark that the iv is preserved. */
6798 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6799 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6801 /* Rewrite the increment so that it uses var_before directly. */
6802 use
= find_interesting_uses_op (data
, cand
->var_after
);
6803 group
= data
->vgroups
[use
->group_id
];
6804 group
->selected
= cand
;
6808 gimple_add_tmp_var (cand
->var_before
);
6810 base
= unshare_expr (cand
->iv
->base
);
6812 create_iv (base
, unshare_expr (cand
->iv
->step
),
6813 cand
->var_before
, data
->current_loop
,
6814 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6817 /* Creates new induction variables described in SET. */
6820 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6823 struct iv_cand
*cand
;
6826 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6828 cand
= data
->vcands
[i
];
6829 create_new_iv (data
, cand
);
6832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6834 fprintf (dump_file
, "Selected IV set for loop %d",
6835 data
->current_loop
->num
);
6836 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6837 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6838 LOCATION_LINE (data
->loop_loc
));
6839 fprintf (dump_file
, ", " HOST_WIDE_INT_PRINT_DEC
" avg niters",
6840 avg_loop_niter (data
->current_loop
));
6841 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6842 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6844 cand
= data
->vcands
[i
];
6845 dump_cand (dump_file
, cand
);
6847 fprintf (dump_file
, "\n");
6851 /* Rewrites USE (definition of iv used in a nonlinear expression)
6852 using candidate CAND. */
6855 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6856 struct iv_use
*use
, struct iv_cand
*cand
)
6859 gimple_stmt_iterator bsi
;
6860 tree comp
, type
= get_use_type (use
), tgt
;
6862 /* An important special case -- if we are asked to express value of
6863 the original iv by itself, just exit; there is no need to
6864 introduce a new computation (that might also need casting the
6865 variable to unsigned and back). */
6866 if (cand
->pos
== IP_ORIGINAL
6867 && cand
->incremented_at
== use
->stmt
)
6869 tree op
= NULL_TREE
;
6870 enum tree_code stmt_code
;
6872 gcc_assert (is_gimple_assign (use
->stmt
));
6873 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6875 /* Check whether we may leave the computation unchanged.
6876 This is the case only if it does not rely on other
6877 computations in the loop -- otherwise, the computation
6878 we rely upon may be removed in remove_unused_ivs,
6879 thus leading to ICE. */
6880 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6881 if (stmt_code
== PLUS_EXPR
6882 || stmt_code
== MINUS_EXPR
6883 || stmt_code
== POINTER_PLUS_EXPR
)
6885 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6886 op
= gimple_assign_rhs2 (use
->stmt
);
6887 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6888 op
= gimple_assign_rhs1 (use
->stmt
);
6891 if (op
!= NULL_TREE
)
6893 if (expr_invariant_in_loop_p (data
->current_loop
, op
))
6895 if (TREE_CODE (op
) == SSA_NAME
)
6897 struct iv
*iv
= get_iv (data
, op
);
6898 if (iv
!= NULL
&& integer_zerop (iv
->step
))
6904 switch (gimple_code (use
->stmt
))
6907 tgt
= PHI_RESULT (use
->stmt
);
6909 /* If we should keep the biv, do not replace it. */
6910 if (name_info (data
, tgt
)->preserve_biv
)
6913 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6917 tgt
= gimple_assign_lhs (use
->stmt
);
6918 bsi
= gsi_for_stmt (use
->stmt
);
6925 aff_tree aff_inv
, aff_var
;
6926 if (!get_computation_aff_1 (data
->current_loop
, use
->stmt
,
6927 use
, cand
, &aff_inv
, &aff_var
))
6930 unshare_aff_combination (&aff_inv
);
6931 unshare_aff_combination (&aff_var
);
6932 /* Prefer CSE opportunity than loop invariant by adding offset at last
6933 so that iv_uses have different offsets can be CSEed. */
6934 poly_widest_int offset
= aff_inv
.offset
;
6937 gimple_seq stmt_list
= NULL
, seq
= NULL
;
6938 tree comp_op1
= aff_combination_to_tree (&aff_inv
);
6939 tree comp_op2
= aff_combination_to_tree (&aff_var
);
6940 gcc_assert (comp_op1
&& comp_op2
);
6942 comp_op1
= force_gimple_operand (comp_op1
, &seq
, true, NULL
);
6943 gimple_seq_add_seq (&stmt_list
, seq
);
6944 comp_op2
= force_gimple_operand (comp_op2
, &seq
, true, NULL
);
6945 gimple_seq_add_seq (&stmt_list
, seq
);
6947 if (POINTER_TYPE_P (TREE_TYPE (comp_op2
)))
6948 std::swap (comp_op1
, comp_op2
);
6950 if (POINTER_TYPE_P (TREE_TYPE (comp_op1
)))
6952 comp
= fold_build_pointer_plus (comp_op1
,
6953 fold_convert (sizetype
, comp_op2
));
6954 comp
= fold_build_pointer_plus (comp
,
6955 wide_int_to_tree (sizetype
, offset
));
6959 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp_op1
,
6960 fold_convert (TREE_TYPE (comp_op1
), comp_op2
));
6961 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp
,
6962 wide_int_to_tree (TREE_TYPE (comp_op1
), offset
));
6965 comp
= fold_convert (type
, comp
);
6966 if (!valid_gimple_rhs_p (comp
)
6967 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6968 /* We can't allow re-allocating the stmt as it might be pointed
6970 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6971 >= gimple_num_ops (gsi_stmt (bsi
)))))
6973 comp
= force_gimple_operand (comp
, &seq
, true, NULL
);
6974 gimple_seq_add_seq (&stmt_list
, seq
);
6975 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6977 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6978 /* As this isn't a plain copy we have to reset alignment
6980 if (SSA_NAME_PTR_INFO (comp
))
6981 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6985 gsi_insert_seq_before (&bsi
, stmt_list
, GSI_SAME_STMT
);
6986 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6988 ass
= gimple_build_assign (tgt
, comp
);
6989 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6991 bsi
= gsi_for_stmt (use
->stmt
);
6992 remove_phi_node (&bsi
, false);
6996 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6997 use
->stmt
= gsi_stmt (bsi
);
7001 /* Performs a peephole optimization to reorder the iv update statement with
7002 a mem ref to enable instruction combining in later phases. The mem ref uses
7003 the iv value before the update, so the reordering transformation requires
7004 adjustment of the offset. CAND is the selected IV_CAND.
7008 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7016 directly propagating t over to (1) will introduce overlapping live range
7017 thus increase register pressure. This peephole transform it into:
7021 t = MEM_REF (base, iv2, 8, 8);
7028 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7031 gimple
*iv_update
, *stmt
;
7033 gimple_stmt_iterator gsi
, gsi_iv
;
7035 if (cand
->pos
!= IP_NORMAL
)
7038 var_after
= cand
->var_after
;
7039 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7041 bb
= gimple_bb (iv_update
);
7042 gsi
= gsi_last_nondebug_bb (bb
);
7043 stmt
= gsi_stmt (gsi
);
7045 /* Only handle conditional statement for now. */
7046 if (gimple_code (stmt
) != GIMPLE_COND
)
7049 gsi_prev_nondebug (&gsi
);
7050 stmt
= gsi_stmt (gsi
);
7051 if (stmt
!= iv_update
)
7054 gsi_prev_nondebug (&gsi
);
7055 if (gsi_end_p (gsi
))
7058 stmt
= gsi_stmt (gsi
);
7059 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7062 if (stmt
!= use
->stmt
)
7065 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7070 fprintf (dump_file
, "Reordering \n");
7071 print_gimple_stmt (dump_file
, iv_update
, 0);
7072 print_gimple_stmt (dump_file
, use
->stmt
, 0);
7073 fprintf (dump_file
, "\n");
7076 gsi
= gsi_for_stmt (use
->stmt
);
7077 gsi_iv
= gsi_for_stmt (iv_update
);
7078 gsi_move_before (&gsi_iv
, &gsi
);
7080 cand
->pos
= IP_BEFORE_USE
;
7081 cand
->incremented_at
= use
->stmt
;
7084 /* Return the alias pointer type that should be used for a MEM_REF
7085 associated with USE, which has type USE_PTR_ADDRESS. */
7088 get_alias_ptr_type_for_ptr_address (iv_use
*use
)
7090 gcall
*call
= as_a
<gcall
*> (use
->stmt
);
7091 switch (gimple_call_internal_fn (call
))
7094 case IFN_MASK_STORE
:
7095 /* The second argument contains the correct alias type. */
7096 gcc_assert (use
->op_p
= gimple_call_arg_ptr (call
, 0));
7097 return TREE_TYPE (gimple_call_arg (call
, 1));
7105 /* Rewrites USE (address that is an iv) using candidate CAND. */
7108 rewrite_use_address (struct ivopts_data
*data
,
7109 struct iv_use
*use
, struct iv_cand
*cand
)
7114 adjust_iv_update_pos (cand
, use
);
7115 ok
= get_computation_aff (data
->current_loop
, use
->stmt
, use
, cand
, &aff
);
7117 unshare_aff_combination (&aff
);
7119 /* To avoid undefined overflow problems, all IV candidates use unsigned
7120 integer types. The drawback is that this makes it impossible for
7121 create_mem_ref to distinguish an IV that is based on a memory object
7122 from one that represents simply an offset.
7124 To work around this problem, we pass a hint to create_mem_ref that
7125 indicates which variable (if any) in aff is an IV based on a memory
7126 object. Note that we only consider the candidate. If this is not
7127 based on an object, the base of the reference is in some subexpression
7128 of the use -- but these will use pointer types, so they are recognized
7129 by the create_mem_ref heuristics anyway. */
7130 tree iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7131 tree base_hint
= (cand
->iv
->base_object
) ? iv
: NULL_TREE
;
7132 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7133 tree type
= use
->mem_type
;
7134 tree alias_ptr_type
;
7135 if (use
->type
== USE_PTR_ADDRESS
)
7136 alias_ptr_type
= get_alias_ptr_type_for_ptr_address (use
);
7139 gcc_assert (type
== TREE_TYPE (*use
->op_p
));
7140 unsigned int align
= get_object_alignment (*use
->op_p
);
7141 if (align
!= TYPE_ALIGN (type
))
7142 type
= build_aligned_type (type
, align
);
7143 alias_ptr_type
= reference_alias_ptr_type (*use
->op_p
);
7145 tree ref
= create_mem_ref (&bsi
, type
, &aff
, alias_ptr_type
,
7146 iv
, base_hint
, data
->speed
);
7148 if (use
->type
== USE_PTR_ADDRESS
)
7150 ref
= fold_build1 (ADDR_EXPR
, build_pointer_type (use
->mem_type
), ref
);
7151 ref
= fold_convert (get_use_type (use
), ref
);
7152 ref
= force_gimple_operand_gsi (&bsi
, ref
, true, NULL_TREE
,
7153 true, GSI_SAME_STMT
);
7156 copy_ref_info (ref
, *use
->op_p
);
7161 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7165 rewrite_use_compare (struct ivopts_data
*data
,
7166 struct iv_use
*use
, struct iv_cand
*cand
)
7168 tree comp
, op
, bound
;
7169 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7170 enum tree_code compare
;
7171 struct iv_group
*group
= data
->vgroups
[use
->group_id
];
7172 struct cost_pair
*cp
= get_group_iv_cost (data
, group
, cand
);
7177 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7178 tree var_type
= TREE_TYPE (var
);
7181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7183 fprintf (dump_file
, "Replacing exit test: ");
7184 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7187 bound
= unshare_expr (fold_convert (var_type
, bound
));
7188 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7190 gsi_insert_seq_on_edge_immediate (
7191 loop_preheader_edge (data
->current_loop
),
7194 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7195 gimple_cond_set_lhs (cond_stmt
, var
);
7196 gimple_cond_set_code (cond_stmt
, compare
);
7197 gimple_cond_set_rhs (cond_stmt
, op
);
7201 /* The induction variable elimination failed; just express the original
7203 comp
= get_computation_at (data
->current_loop
, use
->stmt
, use
, cand
);
7204 gcc_assert (comp
!= NULL_TREE
);
7205 gcc_assert (use
->op_p
!= NULL
);
7206 *use
->op_p
= force_gimple_operand_gsi (&bsi
, comp
, true,
7207 SSA_NAME_VAR (*use
->op_p
),
7208 true, GSI_SAME_STMT
);
7211 /* Rewrite the groups using the selected induction variables. */
7214 rewrite_groups (struct ivopts_data
*data
)
7218 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7220 struct iv_group
*group
= data
->vgroups
[i
];
7221 struct iv_cand
*cand
= group
->selected
;
7225 if (group
->type
== USE_NONLINEAR_EXPR
)
7227 for (j
= 0; j
< group
->vuses
.length (); j
++)
7229 rewrite_use_nonlinear_expr (data
, group
->vuses
[j
], cand
);
7230 update_stmt (group
->vuses
[j
]->stmt
);
7233 else if (address_p (group
->type
))
7235 for (j
= 0; j
< group
->vuses
.length (); j
++)
7237 rewrite_use_address (data
, group
->vuses
[j
], cand
);
7238 update_stmt (group
->vuses
[j
]->stmt
);
7243 gcc_assert (group
->type
== USE_COMPARE
);
7245 for (j
= 0; j
< group
->vuses
.length (); j
++)
7247 rewrite_use_compare (data
, group
->vuses
[j
], cand
);
7248 update_stmt (group
->vuses
[j
]->stmt
);
7254 /* Removes the ivs that are not used after rewriting. */
7257 remove_unused_ivs (struct ivopts_data
*data
)
7261 bitmap toremove
= BITMAP_ALLOC (NULL
);
7263 /* Figure out an order in which to release SSA DEFs so that we don't
7264 release something that we'd have to propagate into a debug stmt
7266 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7268 struct version_info
*info
;
7270 info
= ver_info (data
, j
);
7272 && !integer_zerop (info
->iv
->step
)
7274 && !info
->iv
->nonlin_use
7275 && !info
->preserve_biv
)
7277 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7279 tree def
= info
->iv
->ssa_name
;
7281 if (MAY_HAVE_DEBUG_BIND_STMTS
&& SSA_NAME_DEF_STMT (def
))
7283 imm_use_iterator imm_iter
;
7284 use_operand_p use_p
;
7288 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7290 if (!gimple_debug_bind_p (stmt
))
7293 /* We just want to determine whether to do nothing
7294 (count == 0), to substitute the computed
7295 expression into a single use of the SSA DEF by
7296 itself (count == 1), or to use a debug temp
7297 because the SSA DEF is used multiple times or as
7298 part of a larger expression (count > 1). */
7300 if (gimple_debug_bind_get_value (stmt
) != def
)
7304 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7310 struct iv_use dummy_use
;
7311 struct iv_cand
*best_cand
= NULL
, *cand
;
7312 unsigned i
, best_pref
= 0, cand_pref
;
7314 memset (&dummy_use
, 0, sizeof (dummy_use
));
7315 dummy_use
.iv
= info
->iv
;
7316 for (i
= 0; i
< data
->vgroups
.length () && i
< 64; i
++)
7318 cand
= data
->vgroups
[i
]->selected
;
7319 if (cand
== best_cand
)
7321 cand_pref
= operand_equal_p (cand
->iv
->step
,
7325 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7326 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7329 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7331 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7334 best_pref
= cand_pref
;
7341 tree comp
= get_computation_at (data
->current_loop
,
7342 SSA_NAME_DEF_STMT (def
),
7343 &dummy_use
, best_cand
);
7349 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7350 DECL_ARTIFICIAL (vexpr
) = 1;
7351 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7352 if (SSA_NAME_VAR (def
))
7353 SET_DECL_MODE (vexpr
, DECL_MODE (SSA_NAME_VAR (def
)));
7355 SET_DECL_MODE (vexpr
, TYPE_MODE (TREE_TYPE (vexpr
)));
7357 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7358 gimple_stmt_iterator gsi
;
7360 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7361 gsi
= gsi_after_labels (gimple_bb
7362 (SSA_NAME_DEF_STMT (def
)));
7364 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7366 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7370 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7372 if (!gimple_debug_bind_p (stmt
))
7375 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7376 SET_USE (use_p
, comp
);
7384 release_defs_bitset (toremove
);
7386 BITMAP_FREE (toremove
);
7389 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7390 for hash_map::traverse. */
7393 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7399 /* Frees data allocated by the optimization of a single loop. */
7402 free_loop_data (struct ivopts_data
*data
)
7410 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7411 delete data
->niters
;
7412 data
->niters
= NULL
;
7415 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7417 struct version_info
*info
;
7419 info
= ver_info (data
, i
);
7421 info
->has_nonlin_use
= false;
7422 info
->preserve_biv
= false;
7425 bitmap_clear (data
->relevant
);
7426 bitmap_clear (data
->important_candidates
);
7428 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7430 struct iv_group
*group
= data
->vgroups
[i
];
7432 for (j
= 0; j
< group
->vuses
.length (); j
++)
7433 free (group
->vuses
[j
]);
7434 group
->vuses
.release ();
7436 BITMAP_FREE (group
->related_cands
);
7437 for (j
= 0; j
< group
->n_map_members
; j
++)
7439 if (group
->cost_map
[j
].inv_vars
)
7440 BITMAP_FREE (group
->cost_map
[j
].inv_vars
);
7441 if (group
->cost_map
[j
].inv_exprs
)
7442 BITMAP_FREE (group
->cost_map
[j
].inv_exprs
);
7445 free (group
->cost_map
);
7448 data
->vgroups
.truncate (0);
7450 for (i
= 0; i
< data
->vcands
.length (); i
++)
7452 struct iv_cand
*cand
= data
->vcands
[i
];
7455 BITMAP_FREE (cand
->inv_vars
);
7456 if (cand
->inv_exprs
)
7457 BITMAP_FREE (cand
->inv_exprs
);
7460 data
->vcands
.truncate (0);
7462 if (data
->version_info_size
< num_ssa_names
)
7464 data
->version_info_size
= 2 * num_ssa_names
;
7465 free (data
->version_info
);
7466 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7469 data
->max_inv_var_id
= 0;
7470 data
->max_inv_expr_id
= 0;
7472 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7473 SET_DECL_RTL (obj
, NULL_RTX
);
7475 decl_rtl_to_reset
.truncate (0);
7477 data
->inv_expr_tab
->empty ();
7479 data
->iv_common_cand_tab
->empty ();
7480 data
->iv_common_cands
.truncate (0);
7483 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7487 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7489 free_loop_data (data
);
7490 free (data
->version_info
);
7491 BITMAP_FREE (data
->relevant
);
7492 BITMAP_FREE (data
->important_candidates
);
7494 decl_rtl_to_reset
.release ();
7495 data
->vgroups
.release ();
7496 data
->vcands
.release ();
7497 delete data
->inv_expr_tab
;
7498 data
->inv_expr_tab
= NULL
;
7499 free_affine_expand_cache (&data
->name_expansion_cache
);
7500 delete data
->iv_common_cand_tab
;
7501 data
->iv_common_cand_tab
= NULL
;
7502 data
->iv_common_cands
.release ();
7503 obstack_free (&data
->iv_obstack
, NULL
);
7506 /* Returns true if the loop body BODY includes any function calls. */
7509 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7511 gimple_stmt_iterator gsi
;
7514 for (i
= 0; i
< num_nodes
; i
++)
7515 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7517 gimple
*stmt
= gsi_stmt (gsi
);
7518 if (is_gimple_call (stmt
)
7519 && !gimple_call_internal_p (stmt
)
7520 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7526 /* Optimizes the LOOP. Returns true if anything changed. */
7529 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7531 bool changed
= false;
7532 struct iv_ca
*iv_ca
;
7533 edge exit
= single_dom_exit (loop
);
7536 gcc_assert (!data
->niters
);
7537 data
->current_loop
= loop
;
7538 data
->loop_loc
= find_loop_location (loop
).get_location_t ();
7539 data
->speed
= optimize_loop_for_speed_p (loop
);
7541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7543 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7544 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7545 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7546 LOCATION_LINE (data
->loop_loc
));
7547 fprintf (dump_file
, "\n");
7551 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7552 exit
->src
->index
, exit
->dest
->index
);
7553 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7554 fprintf (dump_file
, "\n");
7557 fprintf (dump_file
, "\n");
7560 body
= get_loop_body (loop
);
7561 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7562 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7565 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7567 /* For each ssa name determines whether it behaves as an induction variable
7569 if (!find_induction_variables (data
))
7572 /* Finds interesting uses (item 1). */
7573 find_interesting_uses (data
);
7574 if (data
->vgroups
.length () > MAX_CONSIDERED_GROUPS
)
7577 /* Finds candidates for the induction variables (item 2). */
7578 find_iv_candidates (data
);
7580 /* Calculates the costs (item 3, part 1). */
7581 determine_iv_costs (data
);
7582 determine_group_iv_costs (data
);
7583 determine_set_costs (data
);
7585 /* Find the optimal set of induction variables (item 3, part 2). */
7586 iv_ca
= find_optimal_iv_set (data
);
7591 /* Create the new induction variables (item 4, part 1). */
7592 create_new_ivs (data
, iv_ca
);
7593 iv_ca_free (&iv_ca
);
7595 /* Rewrite the uses (item 4, part 2). */
7596 rewrite_groups (data
);
7598 /* Remove the ivs that are unused after rewriting. */
7599 remove_unused_ivs (data
);
7601 /* We have changed the structure of induction variables; it might happen
7602 that definitions in the scev database refer to some of them that were
7607 free_loop_data (data
);
7612 /* Main entry point. Optimizes induction variables in loops. */
7615 tree_ssa_iv_optimize (void)
7618 struct ivopts_data data
;
7620 tree_ssa_iv_optimize_init (&data
);
7622 /* Optimize the loops starting with the innermost ones. */
7623 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7625 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7626 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7628 tree_ssa_iv_optimize_loop (&data
, loop
);
7631 tree_ssa_iv_optimize_finalize (&data
);
7634 #include "gt-tree-ssa-loop-ivopts.h"