1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
71 #include "coretypes.h"
77 #include "tree-pass.h"
82 #include "insn-config.h"
86 #include "gimple-pretty-print.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
104 #include "tree-scalar-evolution.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 1000000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop
*loop
)
126 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
129 niter
= likely_max_stmt_executions_int (loop
);
131 if (niter
== -1 || niter
> PARAM_VALUE (PARAM_AVG_LOOP_NITER
))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER
);
140 /* Representation of the induction variable. */
143 tree base
; /* Initial value of the iv. */
144 tree base_object
; /* A memory object to that the induction variable points. */
145 tree step
; /* Step of the iv (constant only). */
146 tree ssa_name
; /* The ssa name with the value. */
147 struct iv_use
*nonlin_use
; /* The identifier in the use if it is the case. */
148 bool biv_p
; /* Is it a biv? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS
, /* Use is an address for an explicit memory
171 USE_PTR_ADDRESS
, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE
/* Use is a compare. */
177 /* Cost of a computation. */
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int64_t cost
, unsigned complexity
, int64_t scratch
= 0)
184 : cost (cost
), complexity (complexity
), scratch (scratch
)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost
operator+ (comp_cost cost1
, comp_cost cost2
);
193 /* Adds COST to the comp_cost. */
194 comp_cost
operator+= (comp_cost cost
);
196 /* Adds constant C to this comp_cost. */
197 comp_cost
operator+= (HOST_WIDE_INT c
);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost
operator-= (HOST_WIDE_INT c
);
202 /* Divide the comp_cost by constant C. */
203 comp_cost
operator/= (HOST_WIDE_INT c
);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost
operator*= (HOST_WIDE_INT c
);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost
operator- (comp_cost cost1
, comp_cost cost2
);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost
operator-= (comp_cost cost
);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1
, comp_cost cost2
);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1
, comp_cost cost2
);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1
, comp_cost cost2
);
223 int64_t cost
; /* The runtime cost. */
224 unsigned complexity
; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int64_t scratch
; /* Scratch used during cost computation. */
231 static const comp_cost no_cost
;
232 static const comp_cost
infinite_cost (INFTY
, 0, INFTY
);
235 comp_cost::infinite_cost_p ()
237 return cost
== INFTY
;
241 operator+ (comp_cost cost1
, comp_cost cost2
)
243 if (cost1
.infinite_cost_p () || cost2
.infinite_cost_p ())
244 return infinite_cost
;
246 gcc_assert (cost1
.cost
+ cost2
.cost
< infinite_cost
.cost
);
247 cost1
.cost
+= cost2
.cost
;
248 cost1
.complexity
+= cost2
.complexity
;
254 operator- (comp_cost cost1
, comp_cost cost2
)
256 if (cost1
.infinite_cost_p ())
257 return infinite_cost
;
259 gcc_assert (!cost2
.infinite_cost_p ());
260 gcc_assert (cost1
.cost
- cost2
.cost
< infinite_cost
.cost
);
262 cost1
.cost
-= cost2
.cost
;
263 cost1
.complexity
-= cost2
.complexity
;
269 comp_cost::operator+= (comp_cost cost
)
271 *this = *this + cost
;
276 comp_cost::operator+= (HOST_WIDE_INT c
)
278 if (infinite_cost_p ())
281 gcc_assert (this->cost
+ c
< infinite_cost
.cost
);
288 comp_cost::operator-= (HOST_WIDE_INT c
)
290 if (infinite_cost_p ())
293 gcc_assert (this->cost
- c
< infinite_cost
.cost
);
300 comp_cost::operator/= (HOST_WIDE_INT c
)
303 if (infinite_cost_p ())
312 comp_cost::operator*= (HOST_WIDE_INT c
)
314 if (infinite_cost_p ())
317 gcc_assert (this->cost
* c
< infinite_cost
.cost
);
324 comp_cost::operator-= (comp_cost cost
)
326 *this = *this - cost
;
331 operator< (comp_cost cost1
, comp_cost cost2
)
333 if (cost1
.cost
== cost2
.cost
)
334 return cost1
.complexity
< cost2
.complexity
;
336 return cost1
.cost
< cost2
.cost
;
340 operator== (comp_cost cost1
, comp_cost cost2
)
342 return cost1
.cost
== cost2
.cost
343 && cost1
.complexity
== cost2
.complexity
;
347 operator<= (comp_cost cost1
, comp_cost cost2
)
349 return cost1
< cost2
|| cost1
== cost2
;
352 struct iv_inv_expr_ent
;
354 /* The candidate - cost pair. */
357 struct iv_cand
*cand
; /* The candidate. */
358 comp_cost cost
; /* The cost. */
359 enum tree_code comp
; /* For iv elimination, the comparison. */
360 bitmap inv_vars
; /* The list of invariant ssa_vars that have to be
361 preserved when representing iv_use with iv_cand. */
362 bitmap inv_exprs
; /* The list of newly created invariant expressions
363 when representing iv_use with iv_cand. */
364 tree value
; /* For final value elimination, the expression for
365 the final value of the iv. For iv elimination,
366 the new bound to compare with. */
372 unsigned id
; /* The id of the use. */
373 unsigned group_id
; /* The group id the use belongs to. */
374 enum use_type type
; /* Type of the use. */
375 tree mem_type
; /* The memory type to use when testing whether an
376 address is legitimate, and what the address's
378 struct iv
*iv
; /* The induction variable it is based on. */
379 gimple
*stmt
; /* Statement in that it occurs. */
380 tree
*op_p
; /* The place where it occurs. */
382 tree addr_base
; /* Base address with const offset stripped. */
383 poly_uint64_pod addr_offset
;
384 /* Const offset stripped from base address. */
390 /* The id of the group. */
392 /* Uses of the group are of the same type. */
394 /* The set of "related" IV candidates, plus the important ones. */
395 bitmap related_cands
;
396 /* Number of IV candidates in the cost_map. */
397 unsigned n_map_members
;
398 /* The costs wrto the iv candidates. */
399 struct cost_pair
*cost_map
;
400 /* The selected candidate for the group. */
401 struct iv_cand
*selected
;
402 /* Uses in the group. */
403 vec
<struct iv_use
*> vuses
;
406 /* The position where the iv is computed. */
409 IP_NORMAL
, /* At the end, just before the exit condition. */
410 IP_END
, /* At the end of the latch block. */
411 IP_BEFORE_USE
, /* Immediately before a specific use. */
412 IP_AFTER_USE
, /* Immediately after a specific use. */
413 IP_ORIGINAL
/* The original biv. */
416 /* The induction variable candidate. */
419 unsigned id
; /* The number of the candidate. */
420 bool important
; /* Whether this is an "important" candidate, i.e. such
421 that it should be considered by all uses. */
422 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
423 gimple
*incremented_at
;/* For original biv, the statement where it is
425 tree var_before
; /* The variable used for it before increment. */
426 tree var_after
; /* The variable used for it after increment. */
427 struct iv
*iv
; /* The value of the candidate. NULL for
428 "pseudocandidate" used to indicate the possibility
429 to replace the final value of an iv by direct
430 computation of the value. */
431 unsigned cost
; /* Cost of the candidate. */
432 unsigned cost_step
; /* Cost of the candidate's increment operation. */
433 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
434 where it is incremented. */
435 bitmap inv_vars
; /* The list of invariant ssa_vars used in step of the
437 bitmap inv_exprs
; /* If step is more complicated than a single ssa_var,
438 hanlde it as a new invariant expression which will
439 be hoisted out of loop. */
440 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
444 /* Hashtable entry for common candidate derived from iv uses. */
445 struct iv_common_cand
449 /* IV uses from which this common candidate is derived. */
450 auto_vec
<struct iv_use
*> uses
;
454 /* Hashtable helpers. */
456 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
458 static inline hashval_t
hash (const iv_common_cand
*);
459 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
462 /* Hash function for possible common candidates. */
465 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
470 /* Hash table equality function for common candidates. */
473 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
474 const iv_common_cand
*ccand2
)
476 return (ccand1
->hash
== ccand2
->hash
477 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
478 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
479 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
480 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
483 /* Loop invariant expression hashtable entry. */
485 struct iv_inv_expr_ent
487 /* Tree expression of the entry. */
489 /* Unique indentifier. */
495 /* Sort iv_inv_expr_ent pair A and B by id field. */
498 sort_iv_inv_expr_ent (const void *a
, const void *b
)
500 const iv_inv_expr_ent
* const *e1
= (const iv_inv_expr_ent
* const *) (a
);
501 const iv_inv_expr_ent
* const *e2
= (const iv_inv_expr_ent
* const *) (b
);
503 unsigned id1
= (*e1
)->id
;
504 unsigned id2
= (*e2
)->id
;
514 /* Hashtable helpers. */
516 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
518 static inline hashval_t
hash (const iv_inv_expr_ent
*);
519 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
522 /* Return true if uses of type TYPE represent some form of address. */
525 address_p (use_type type
)
527 return type
== USE_REF_ADDRESS
|| type
== USE_PTR_ADDRESS
;
530 /* Hash function for loop invariant expressions. */
533 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
538 /* Hash table equality function for expressions. */
541 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
542 const iv_inv_expr_ent
*expr2
)
544 return expr1
->hash
== expr2
->hash
545 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
550 /* The currently optimized loop. */
551 struct loop
*current_loop
;
554 /* Numbers of iterations for all exits of the current loop. */
555 hash_map
<edge
, tree_niter_desc
*> *niters
;
557 /* Number of registers used in it. */
560 /* The size of version_info array allocated. */
561 unsigned version_info_size
;
563 /* The array of information for the ssa names. */
564 struct version_info
*version_info
;
566 /* The hashtable of loop invariant expressions created
568 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
570 /* The bitmap of indices in version_info whose value was changed. */
573 /* The uses of induction variables. */
574 vec
<iv_group
*> vgroups
;
576 /* The candidates. */
577 vec
<iv_cand
*> vcands
;
579 /* A bitmap of important candidates. */
580 bitmap important_candidates
;
582 /* Cache used by tree_to_aff_combination_expand. */
583 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
585 /* The hashtable of common candidates derived from iv uses. */
586 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
588 /* The common candidates. */
589 vec
<iv_common_cand
*> iv_common_cands
;
591 /* The maximum invariant variable id. */
592 unsigned max_inv_var_id
;
594 /* The maximum invariant expression id. */
595 unsigned max_inv_expr_id
;
597 /* Number of no_overflow BIVs which are not used in memory address. */
598 unsigned bivs_not_used_in_addr
;
600 /* Obstack for iv structure. */
601 struct obstack iv_obstack
;
603 /* Whether to consider just related and important candidates when replacing a
605 bool consider_all_candidates
;
607 /* Are we optimizing for speed? */
610 /* Whether the loop body includes any function calls. */
611 bool body_includes_call
;
613 /* Whether the loop body can only be exited via single exit. */
614 bool loop_single_exit_p
;
617 /* An assignment of iv candidates to uses. */
621 /* The number of uses covered by the assignment. */
624 /* Number of uses that cannot be expressed by the candidates in the set. */
627 /* Candidate assigned to a use, together with the related costs. */
628 struct cost_pair
**cand_for_group
;
630 /* Number of times each candidate is used. */
631 unsigned *n_cand_uses
;
633 /* The candidates used. */
636 /* The number of candidates in the set. */
639 /* The number of invariants needed, including both invariant variants and
640 invariant expressions. */
643 /* Total cost of expressing uses. */
644 comp_cost cand_use_cost
;
646 /* Total cost of candidates. */
649 /* Number of times each invariant variable is used. */
650 unsigned *n_inv_var_uses
;
652 /* Number of times each invariant expression is used. */
653 unsigned *n_inv_expr_uses
;
655 /* Total cost of the assignment. */
659 /* Difference of two iv candidate assignments. */
664 struct iv_group
*group
;
666 /* An old assignment (for rollback purposes). */
667 struct cost_pair
*old_cp
;
669 /* A new assignment. */
670 struct cost_pair
*new_cp
;
672 /* Next change in the list. */
673 struct iv_ca_delta
*next
;
676 /* Bound on number of candidates below that all candidates are considered. */
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682 optimizing such a loop would help, and it would take ages). */
684 #define MAX_CONSIDERED_GROUPS \
685 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688 ivs from the set always. */
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
693 /* The list of trees for that the decl_rtl field must be reset is stored
696 static vec
<tree
> decl_rtl_to_reset
;
698 static comp_cost
force_expr_to_var_cost (tree
, bool);
700 /* The single loop exit if it dominates the latch, NULL otherwise. */
703 single_dom_exit (struct loop
*loop
)
705 edge exit
= single_exit (loop
);
710 if (!just_once_each_iteration_p (loop
, exit
->src
))
716 /* Dumps information about the induction variable IV to FILE. Don't dump
717 variable's name if DUMP_NAME is FALSE. The information is dumped with
718 preceding spaces indicated by INDENT_LEVEL. */
721 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
, unsigned indent_level
)
724 const char spaces
[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
726 if (indent_level
> 4)
728 p
= spaces
+ 8 - (indent_level
<< 1);
730 fprintf (file
, "%sIV struct:\n", p
);
731 if (iv
->ssa_name
&& dump_name
)
733 fprintf (file
, "%s SSA_NAME:\t", p
);
734 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
735 fprintf (file
, "\n");
738 fprintf (file
, "%s Type:\t", p
);
739 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
740 fprintf (file
, "\n");
742 fprintf (file
, "%s Base:\t", p
);
743 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
744 fprintf (file
, "\n");
746 fprintf (file
, "%s Step:\t", p
);
747 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
748 fprintf (file
, "\n");
752 fprintf (file
, "%s Object:\t", p
);
753 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
754 fprintf (file
, "\n");
757 fprintf (file
, "%s Biv:\t%c\n", p
, iv
->biv_p
? 'Y' : 'N');
759 fprintf (file
, "%s Overflowness wrto loop niter:\t%s\n",
760 p
, iv
->no_overflow
? "No-overflow" : "Overflow");
763 /* Dumps information about the USE to FILE. */
766 dump_use (FILE *file
, struct iv_use
*use
)
768 fprintf (file
, " Use %d.%d:\n", use
->group_id
, use
->id
);
769 fprintf (file
, " At stmt:\t");
770 print_gimple_stmt (file
, use
->stmt
, 0);
771 fprintf (file
, " At pos:\t");
773 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
774 fprintf (file
, "\n");
775 dump_iv (file
, use
->iv
, false, 2);
778 /* Dumps information about the uses to FILE. */
781 dump_groups (FILE *file
, struct ivopts_data
*data
)
784 struct iv_group
*group
;
786 for (i
= 0; i
< data
->vgroups
.length (); i
++)
788 group
= data
->vgroups
[i
];
789 fprintf (file
, "Group %d:\n", group
->id
);
790 if (group
->type
== USE_NONLINEAR_EXPR
)
791 fprintf (file
, " Type:\tGENERIC\n");
792 else if (group
->type
== USE_REF_ADDRESS
)
793 fprintf (file
, " Type:\tREFERENCE ADDRESS\n");
794 else if (group
->type
== USE_PTR_ADDRESS
)
795 fprintf (file
, " Type:\tPOINTER ARGUMENT ADDRESS\n");
798 gcc_assert (group
->type
== USE_COMPARE
);
799 fprintf (file
, " Type:\tCOMPARE\n");
801 for (j
= 0; j
< group
->vuses
.length (); j
++)
802 dump_use (file
, group
->vuses
[j
]);
806 /* Dumps information about induction variable candidate CAND to FILE. */
809 dump_cand (FILE *file
, struct iv_cand
*cand
)
811 struct iv
*iv
= cand
->iv
;
813 fprintf (file
, "Candidate %d:\n", cand
->id
);
816 fprintf (file
, " Depend on inv.vars: ");
817 dump_bitmap (file
, cand
->inv_vars
);
821 fprintf (file
, " Depend on inv.exprs: ");
822 dump_bitmap (file
, cand
->inv_exprs
);
825 if (cand
->var_before
)
827 fprintf (file
, " Var befor: ");
828 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
829 fprintf (file
, "\n");
833 fprintf (file
, " Var after: ");
834 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
835 fprintf (file
, "\n");
841 fprintf (file
, " Incr POS: before exit test\n");
845 fprintf (file
, " Incr POS: before use %d\n", cand
->ainc_use
->id
);
849 fprintf (file
, " Incr POS: after use %d\n", cand
->ainc_use
->id
);
853 fprintf (file
, " Incr POS: at end\n");
857 fprintf (file
, " Incr POS: orig biv\n");
861 dump_iv (file
, iv
, false, 1);
864 /* Returns the info for ssa version VER. */
866 static inline struct version_info
*
867 ver_info (struct ivopts_data
*data
, unsigned ver
)
869 return data
->version_info
+ ver
;
872 /* Returns the info for ssa name NAME. */
874 static inline struct version_info
*
875 name_info (struct ivopts_data
*data
, tree name
)
877 return ver_info (data
, SSA_NAME_VERSION (name
));
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
884 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
886 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
890 if (sbb
== loop
->latch
)
896 return stmt
== last_stmt (bb
);
899 /* Returns true if STMT if after the place where the original induction
900 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
901 if the positions are identical. */
904 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
906 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
907 basic_block stmt_bb
= gimple_bb (stmt
);
909 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
912 if (stmt_bb
!= cand_bb
)
916 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
918 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
921 /* Returns true if STMT if after the place where the induction variable
922 CAND is incremented in LOOP. */
925 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
933 return stmt_after_ip_normal_pos (loop
, stmt
);
937 return stmt_after_inc_pos (cand
, stmt
, false);
940 return stmt_after_inc_pos (cand
, stmt
, true);
947 /* walk_tree callback for contains_abnormal_ssa_name_p. */
950 contains_abnormal_ssa_name_p_1 (tree
*tp
, int *walk_subtrees
, void *)
952 if (TREE_CODE (*tp
) == SSA_NAME
953 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp
))
962 /* Returns true if EXPR contains a ssa name that occurs in an
963 abnormal phi node. */
966 contains_abnormal_ssa_name_p (tree expr
)
968 return walk_tree_without_duplicates
969 (&expr
, contains_abnormal_ssa_name_p_1
, NULL
) != NULL_TREE
;
972 /* Returns the structure describing number of iterations determined from
973 EXIT of DATA->current_loop, or NULL if something goes wrong. */
975 static struct tree_niter_desc
*
976 niter_for_exit (struct ivopts_data
*data
, edge exit
)
978 struct tree_niter_desc
*desc
;
979 tree_niter_desc
**slot
;
983 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
987 slot
= data
->niters
->get (exit
);
991 /* Try to determine number of iterations. We cannot safely work with ssa
992 names that appear in phi nodes on abnormal edges, so that we do not
993 create overlapping life ranges for them (PR 27283). */
994 desc
= XNEW (struct tree_niter_desc
);
995 if (!number_of_iterations_exit (data
->current_loop
,
997 || contains_abnormal_ssa_name_p (desc
->niter
))
1002 data
->niters
->put (exit
, desc
);
1010 /* Returns the structure describing number of iterations determined from
1011 single dominating exit of DATA->current_loop, or NULL if something
1014 static struct tree_niter_desc
*
1015 niter_for_single_dom_exit (struct ivopts_data
*data
)
1017 edge exit
= single_dom_exit (data
->current_loop
);
1022 return niter_for_exit (data
, exit
);
1025 /* Initializes data structures used by the iv optimization pass, stored
1029 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
1031 data
->version_info_size
= 2 * num_ssa_names
;
1032 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
1033 data
->relevant
= BITMAP_ALLOC (NULL
);
1034 data
->important_candidates
= BITMAP_ALLOC (NULL
);
1035 data
->max_inv_var_id
= 0;
1036 data
->max_inv_expr_id
= 0;
1037 data
->niters
= NULL
;
1038 data
->vgroups
.create (20);
1039 data
->vcands
.create (20);
1040 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
1041 data
->name_expansion_cache
= NULL
;
1042 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
1043 data
->iv_common_cands
.create (20);
1044 decl_rtl_to_reset
.create (20);
1045 gcc_obstack_init (&data
->iv_obstack
);
1048 /* Returns a memory object to that EXPR points. In case we are able to
1049 determine that it does not point to any such object, NULL is returned. */
1052 determine_base_object (tree expr
)
1054 enum tree_code code
= TREE_CODE (expr
);
1057 /* If this is a pointer casted to any type, we need to determine
1058 the base object for the pointer; so handle conversions before
1059 throwing away non-pointer expressions. */
1060 if (CONVERT_EXPR_P (expr
))
1061 return determine_base_object (TREE_OPERAND (expr
, 0));
1063 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
1072 obj
= TREE_OPERAND (expr
, 0);
1073 base
= get_base_address (obj
);
1078 if (TREE_CODE (base
) == MEM_REF
)
1079 return determine_base_object (TREE_OPERAND (base
, 0));
1081 return fold_convert (ptr_type_node
,
1082 build_fold_addr_expr (base
));
1084 case POINTER_PLUS_EXPR
:
1085 return determine_base_object (TREE_OPERAND (expr
, 0));
1089 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1093 if (POLY_INT_CST_P (expr
))
1095 return fold_convert (ptr_type_node
, expr
);
1099 /* Return true if address expression with non-DECL_P operand appears
1103 contain_complex_addr_expr (tree expr
)
1108 switch (TREE_CODE (expr
))
1110 case POINTER_PLUS_EXPR
:
1113 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1114 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1118 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1127 /* Allocates an induction variable with given initial value BASE and step STEP
1128 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1131 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1132 bool no_overflow
= false)
1135 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1136 sizeof (struct iv
));
1137 gcc_assert (step
!= NULL_TREE
);
1139 /* Lower address expression in base except ones with DECL_P as operand.
1141 1) More accurate cost can be computed for address expressions;
1142 2) Duplicate candidates won't be created for bases in different
1143 forms, like &a[0] and &a. */
1145 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1146 || contain_complex_addr_expr (expr
))
1149 tree_to_aff_combination (expr
, TREE_TYPE (expr
), &comb
);
1150 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1154 iv
->base_object
= determine_base_object (base
);
1157 iv
->nonlin_use
= NULL
;
1158 iv
->ssa_name
= NULL_TREE
;
1160 && !iv_can_overflow_p (data
->current_loop
, TREE_TYPE (base
),
1163 iv
->no_overflow
= no_overflow
;
1164 iv
->have_address_use
= false;
1169 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1170 doesn't overflow. */
1173 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1176 struct version_info
*info
= name_info (data
, iv
);
1178 gcc_assert (!info
->iv
);
1180 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1181 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1182 info
->iv
->ssa_name
= iv
;
1185 /* Finds induction variable declaration for VAR. */
1188 get_iv (struct ivopts_data
*data
, tree var
)
1191 tree type
= TREE_TYPE (var
);
1193 if (!POINTER_TYPE_P (type
)
1194 && !INTEGRAL_TYPE_P (type
))
1197 if (!name_info (data
, var
)->iv
)
1199 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1202 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1203 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1206 return name_info (data
, var
)->iv
;
1209 /* Return the first non-invariant ssa var found in EXPR. */
1212 extract_single_var_from_expr (tree expr
)
1216 enum tree_code code
;
1218 if (!expr
|| is_gimple_min_invariant (expr
))
1221 code
= TREE_CODE (expr
);
1222 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1224 n
= TREE_OPERAND_LENGTH (expr
);
1225 for (i
= 0; i
< n
; i
++)
1227 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1233 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1236 /* Finds basic ivs. */
1239 find_bivs (struct ivopts_data
*data
)
1243 tree step
, type
, base
, stop
;
1245 struct loop
*loop
= data
->current_loop
;
1248 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1252 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1255 if (virtual_operand_p (PHI_RESULT (phi
)))
1258 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1261 if (integer_zerop (iv
.step
))
1265 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1266 /* Stop expanding iv base at the first ssa var referred by iv step.
1267 Ideally we should stop at any ssa var, because that's expensive
1268 and unusual to happen, we just do it on the first one.
1270 See PR64705 for the rationale. */
1271 stop
= extract_single_var_from_expr (step
);
1272 base
= expand_simple_operations (base
, stop
);
1273 if (contains_abnormal_ssa_name_p (base
)
1274 || contains_abnormal_ssa_name_p (step
))
1277 type
= TREE_TYPE (PHI_RESULT (phi
));
1278 base
= fold_convert (type
, base
);
1281 if (POINTER_TYPE_P (type
))
1282 step
= convert_to_ptrofftype (step
);
1284 step
= fold_convert (type
, step
);
1287 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1294 /* Marks basic ivs. */
1297 mark_bivs (struct ivopts_data
*data
)
1302 struct iv
*iv
, *incr_iv
;
1303 struct loop
*loop
= data
->current_loop
;
1304 basic_block incr_bb
;
1307 data
->bivs_not_used_in_addr
= 0;
1308 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1312 iv
= get_iv (data
, PHI_RESULT (phi
));
1316 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1317 def
= SSA_NAME_DEF_STMT (var
);
1318 /* Don't mark iv peeled from other one as biv. */
1320 && gimple_code (def
) == GIMPLE_PHI
1321 && gimple_bb (def
) == loop
->header
)
1324 incr_iv
= get_iv (data
, var
);
1328 /* If the increment is in the subloop, ignore it. */
1329 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1330 if (incr_bb
->loop_father
!= data
->current_loop
1331 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1335 incr_iv
->biv_p
= true;
1336 if (iv
->no_overflow
)
1337 data
->bivs_not_used_in_addr
++;
1338 if (incr_iv
->no_overflow
)
1339 data
->bivs_not_used_in_addr
++;
1343 /* Checks whether STMT defines a linear induction variable and stores its
1344 parameters to IV. */
1347 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1350 struct loop
*loop
= data
->current_loop
;
1352 iv
->base
= NULL_TREE
;
1353 iv
->step
= NULL_TREE
;
1355 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1358 lhs
= gimple_assign_lhs (stmt
);
1359 if (TREE_CODE (lhs
) != SSA_NAME
)
1362 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1365 /* Stop expanding iv base at the first ssa var referred by iv step.
1366 Ideally we should stop at any ssa var, because that's expensive
1367 and unusual to happen, we just do it on the first one.
1369 See PR64705 for the rationale. */
1370 stop
= extract_single_var_from_expr (iv
->step
);
1371 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1372 if (contains_abnormal_ssa_name_p (iv
->base
)
1373 || contains_abnormal_ssa_name_p (iv
->step
))
1376 /* If STMT could throw, then do not consider STMT as defining a GIV.
1377 While this will suppress optimizations, we cannot safely delete this
1378 GIV and associated statements, even if it appears it is not used. */
1379 if (stmt_could_throw_p (cfun
, stmt
))
1385 /* Finds general ivs in statement STMT. */
1388 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1392 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1395 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1398 /* Finds general ivs in basic block BB. */
1401 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1403 gimple_stmt_iterator bsi
;
1405 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1406 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1409 /* Finds general ivs. */
1412 find_givs (struct ivopts_data
*data
)
1414 struct loop
*loop
= data
->current_loop
;
1415 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1418 for (i
= 0; i
< loop
->num_nodes
; i
++)
1419 find_givs_in_bb (data
, body
[i
]);
1423 /* For each ssa name defined in LOOP determines whether it is an induction
1424 variable and if so, its initial value and step. */
1427 find_induction_variables (struct ivopts_data
*data
)
1432 if (!find_bivs (data
))
1438 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1440 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1444 fprintf (dump_file
, " number of iterations ");
1445 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1446 if (!integer_zerop (niter
->may_be_zero
))
1448 fprintf (dump_file
, "; zero if ");
1449 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1451 fprintf (dump_file
, "\n");
1454 fprintf (dump_file
, "\n<Induction Vars>:\n");
1455 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1457 struct version_info
*info
= ver_info (data
, i
);
1458 if (info
->iv
&& info
->iv
->step
&& !integer_zerop (info
->iv
->step
))
1459 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true, 0);
1466 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1467 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1468 is the const offset stripped from IV base and MEM_TYPE is the type
1469 of the memory being addressed. For uses of other types, ADDR_BASE
1470 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1472 static struct iv_use
*
1473 record_use (struct iv_group
*group
, tree
*use_p
, struct iv
*iv
,
1474 gimple
*stmt
, enum use_type type
, tree mem_type
,
1475 tree addr_base
, poly_uint64 addr_offset
)
1477 struct iv_use
*use
= XCNEW (struct iv_use
);
1479 use
->id
= group
->vuses
.length ();
1480 use
->group_id
= group
->id
;
1482 use
->mem_type
= mem_type
;
1486 use
->addr_base
= addr_base
;
1487 use
->addr_offset
= addr_offset
;
1489 group
->vuses
.safe_push (use
);
1493 /* Checks whether OP is a loop-level invariant and if so, records it.
1494 NONLINEAR_USE is true if the invariant is used in a way we do not
1495 handle specially. */
1498 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1501 struct version_info
*info
;
1503 if (TREE_CODE (op
) != SSA_NAME
1504 || virtual_operand_p (op
))
1507 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1509 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1512 info
= name_info (data
, op
);
1514 info
->has_nonlin_use
|= nonlinear_use
;
1516 info
->inv_id
= ++data
->max_inv_var_id
;
1517 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1520 /* Record a group of TYPE. */
1522 static struct iv_group
*
1523 record_group (struct ivopts_data
*data
, enum use_type type
)
1525 struct iv_group
*group
= XCNEW (struct iv_group
);
1527 group
->id
= data
->vgroups
.length ();
1529 group
->related_cands
= BITMAP_ALLOC (NULL
);
1530 group
->vuses
.create (1);
1532 data
->vgroups
.safe_push (group
);
1536 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1537 New group will be created if there is no existing group for the use.
1538 MEM_TYPE is the type of memory being addressed, or NULL if this
1539 isn't an address reference. */
1541 static struct iv_use
*
1542 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1543 struct iv
*iv
, gimple
*stmt
, enum use_type type
,
1546 tree addr_base
= NULL
;
1547 struct iv_group
*group
= NULL
;
1548 poly_uint64 addr_offset
= 0;
1550 /* Record non address type use in a new group. */
1551 if (address_p (type
))
1555 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1556 for (i
= 0; i
< data
->vgroups
.length (); i
++)
1560 group
= data
->vgroups
[i
];
1561 use
= group
->vuses
[0];
1562 if (!address_p (use
->type
))
1565 /* Check if it has the same stripped base and step. */
1566 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1567 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1568 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1571 if (i
== data
->vgroups
.length ())
1576 group
= record_group (data
, type
);
1578 return record_use (group
, use_p
, iv
, stmt
, type
, mem_type
,
1579 addr_base
, addr_offset
);
1582 /* Checks whether the use OP is interesting and if so, records it. */
1584 static struct iv_use
*
1585 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1591 if (TREE_CODE (op
) != SSA_NAME
)
1594 iv
= get_iv (data
, op
);
1600 gcc_assert (iv
->nonlin_use
->type
== USE_NONLINEAR_EXPR
);
1601 return iv
->nonlin_use
;
1604 if (integer_zerop (iv
->step
))
1606 record_invariant (data
, op
, true);
1610 stmt
= SSA_NAME_DEF_STMT (op
);
1611 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
|| is_gimple_assign (stmt
));
1613 use
= record_group_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
, NULL_TREE
);
1614 iv
->nonlin_use
= use
;
1618 /* Indicate how compare type iv_use can be handled. */
1619 enum comp_iv_rewrite
1622 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1624 /* We may rewrite compare type iv_uses on both sides of comparison by
1625 expressing value of each iv_use. */
1627 /* We may rewrite compare type iv_use by expressing value of the iv_use
1628 or by eliminating it with other iv_cand. */
1632 /* Given a condition in statement STMT, checks whether it is a compare
1633 of an induction variable and an invariant. If this is the case,
1634 CONTROL_VAR is set to location of the iv, BOUND to the location of
1635 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1636 induction variable descriptions, and true is returned. If this is not
1637 the case, CONTROL_VAR and BOUND are set to the arguments of the
1638 condition and false is returned. */
1640 static enum comp_iv_rewrite
1641 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1642 tree
**control_var
, tree
**bound
,
1643 struct iv
**iv_var
, struct iv
**iv_bound
)
1645 /* The objects returned when COND has constant operands. */
1646 static struct iv const_iv
;
1648 tree
*op0
= &zero
, *op1
= &zero
;
1649 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1650 enum comp_iv_rewrite rewrite_type
= COMP_IV_NA
;
1652 if (gimple_code (stmt
) == GIMPLE_COND
)
1654 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1655 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1656 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1660 op0
= gimple_assign_rhs1_ptr (stmt
);
1661 op1
= gimple_assign_rhs2_ptr (stmt
);
1664 zero
= integer_zero_node
;
1665 const_iv
.step
= integer_zero_node
;
1667 if (TREE_CODE (*op0
) == SSA_NAME
)
1668 iv0
= get_iv (data
, *op0
);
1669 if (TREE_CODE (*op1
) == SSA_NAME
)
1670 iv1
= get_iv (data
, *op1
);
1672 /* If both sides of comparison are IVs. We can express ivs on both end. */
1673 if (iv0
&& iv1
&& !integer_zerop (iv0
->step
) && !integer_zerop (iv1
->step
))
1675 rewrite_type
= COMP_IV_EXPR_2
;
1679 /* If none side of comparison is IV. */
1680 if ((!iv0
|| integer_zerop (iv0
->step
))
1681 && (!iv1
|| integer_zerop (iv1
->step
)))
1684 /* Control variable may be on the other side. */
1685 if (!iv0
|| integer_zerop (iv0
->step
))
1687 std::swap (op0
, op1
);
1688 std::swap (iv0
, iv1
);
1690 /* If one side is IV and the other side isn't loop invariant. */
1692 rewrite_type
= COMP_IV_EXPR
;
1693 /* If one side is IV and the other side is loop invariant. */
1694 else if (!integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
))
1695 rewrite_type
= COMP_IV_ELIM
;
1707 return rewrite_type
;
1710 /* Checks whether the condition in STMT is interesting and if so,
1714 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1716 tree
*var_p
, *bound_p
;
1717 struct iv
*var_iv
, *bound_iv
;
1718 enum comp_iv_rewrite ret
;
1720 ret
= extract_cond_operands (data
, stmt
,
1721 &var_p
, &bound_p
, &var_iv
, &bound_iv
);
1722 if (ret
== COMP_IV_NA
)
1724 find_interesting_uses_op (data
, *var_p
);
1725 find_interesting_uses_op (data
, *bound_p
);
1729 record_group_use (data
, var_p
, var_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1730 /* Record compare type iv_use for iv on the other side of comparison. */
1731 if (ret
== COMP_IV_EXPR_2
)
1732 record_group_use (data
, bound_p
, bound_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1735 /* Returns the outermost loop EXPR is obviously invariant in
1736 relative to the loop LOOP, i.e. if all its operands are defined
1737 outside of the returned loop. Returns NULL if EXPR is not
1738 even obviously invariant in LOOP. */
1741 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1746 if (is_gimple_min_invariant (expr
))
1747 return current_loops
->tree_root
;
1749 if (TREE_CODE (expr
) == SSA_NAME
)
1751 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1754 if (flow_bb_inside_loop_p (loop
, def_bb
))
1756 return superloop_at_depth (loop
,
1757 loop_depth (def_bb
->loop_father
) + 1);
1760 return current_loops
->tree_root
;
1766 unsigned maxdepth
= 0;
1767 len
= TREE_OPERAND_LENGTH (expr
);
1768 for (i
= 0; i
< len
; i
++)
1770 struct loop
*ivloop
;
1771 if (!TREE_OPERAND (expr
, i
))
1774 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1777 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1780 return superloop_at_depth (loop
, maxdepth
);
1783 /* Returns true if expression EXPR is obviously invariant in LOOP,
1784 i.e. if all its operands are defined outside of the LOOP. LOOP
1785 should not be the function body. */
1788 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1793 gcc_assert (loop_depth (loop
) > 0);
1795 if (is_gimple_min_invariant (expr
))
1798 if (TREE_CODE (expr
) == SSA_NAME
)
1800 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1802 && flow_bb_inside_loop_p (loop
, def_bb
))
1811 len
= TREE_OPERAND_LENGTH (expr
);
1812 for (i
= 0; i
< len
; i
++)
1813 if (TREE_OPERAND (expr
, i
)
1814 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1820 /* Given expression EXPR which computes inductive values with respect
1821 to loop recorded in DATA, this function returns biv from which EXPR
1822 is derived by tracing definition chains of ssa variables in EXPR. */
1825 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1830 enum tree_code code
;
1833 if (expr
== NULL_TREE
)
1836 if (is_gimple_min_invariant (expr
))
1839 code
= TREE_CODE (expr
);
1840 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1842 n
= TREE_OPERAND_LENGTH (expr
);
1843 for (i
= 0; i
< n
; i
++)
1845 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1851 /* Stop if it's not ssa name. */
1852 if (code
!= SSA_NAME
)
1855 iv
= get_iv (data
, expr
);
1856 if (!iv
|| integer_zerop (iv
->step
))
1861 stmt
= SSA_NAME_DEF_STMT (expr
);
1862 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1865 use_operand_p use_p
;
1866 basic_block phi_bb
= gimple_bb (phi
);
1868 /* Skip loop header PHI that doesn't define biv. */
1869 if (phi_bb
->loop_father
== data
->current_loop
)
1872 if (virtual_operand_p (gimple_phi_result (phi
)))
1875 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1877 tree use
= USE_FROM_PTR (use_p
);
1878 iv
= find_deriving_biv_for_expr (data
, use
);
1884 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1887 e1
= gimple_assign_rhs1 (stmt
);
1888 code
= gimple_assign_rhs_code (stmt
);
1889 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1890 return find_deriving_biv_for_expr (data
, e1
);
1897 case POINTER_PLUS_EXPR
:
1898 /* Increments, decrements and multiplications by a constant
1900 e2
= gimple_assign_rhs2 (stmt
);
1901 iv
= find_deriving_biv_for_expr (data
, e2
);
1907 /* Casts are simple. */
1908 return find_deriving_biv_for_expr (data
, e1
);
1917 /* Record BIV, its predecessor and successor that they are used in
1918 address type uses. */
1921 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1924 tree type
, base_1
, base_2
;
1927 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1928 || biv
->have_address_use
|| !biv
->no_overflow
)
1931 type
= TREE_TYPE (biv
->base
);
1932 if (!INTEGRAL_TYPE_P (type
))
1935 biv
->have_address_use
= true;
1936 data
->bivs_not_used_in_addr
--;
1937 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1938 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1940 struct iv
*iv
= ver_info (data
, i
)->iv
;
1942 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1943 || iv
->have_address_use
|| !iv
->no_overflow
)
1946 if (type
!= TREE_TYPE (iv
->base
)
1947 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1950 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1953 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1954 if (operand_equal_p (base_1
, iv
->base
, 0)
1955 || operand_equal_p (base_2
, biv
->base
, 0))
1957 iv
->have_address_use
= true;
1958 data
->bivs_not_used_in_addr
--;
1963 /* Cumulates the steps of indices into DATA and replaces their values with the
1964 initial ones. Returns false when the value of the index cannot be determined.
1965 Callback for for_each_index. */
1967 struct ifs_ivopts_data
1969 struct ivopts_data
*ivopts_data
;
1975 idx_find_step (tree base
, tree
*idx
, void *data
)
1977 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1979 bool use_overflow_semantics
= false;
1980 tree step
, iv_base
, iv_step
, lbound
, off
;
1981 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1983 /* If base is a component ref, require that the offset of the reference
1985 if (TREE_CODE (base
) == COMPONENT_REF
)
1987 off
= component_ref_field_offset (base
);
1988 return expr_invariant_in_loop_p (loop
, off
);
1991 /* If base is array, first check whether we will be able to move the
1992 reference out of the loop (in order to take its address in strength
1993 reduction). In order for this to work we need both lower bound
1994 and step to be loop invariants. */
1995 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1997 /* Moreover, for a range, the size needs to be invariant as well. */
1998 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1999 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
2002 step
= array_ref_element_size (base
);
2003 lbound
= array_ref_low_bound (base
);
2005 if (!expr_invariant_in_loop_p (loop
, step
)
2006 || !expr_invariant_in_loop_p (loop
, lbound
))
2010 if (TREE_CODE (*idx
) != SSA_NAME
)
2013 iv
= get_iv (dta
->ivopts_data
, *idx
);
2017 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2018 *&x[0], which is not folded and does not trigger the
2019 ARRAY_REF path below. */
2022 if (integer_zerop (iv
->step
))
2025 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2027 step
= array_ref_element_size (base
);
2029 /* We only handle addresses whose step is an integer constant. */
2030 if (TREE_CODE (step
) != INTEGER_CST
)
2034 /* The step for pointer arithmetics already is 1 byte. */
2035 step
= size_one_node
;
2039 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
2040 use_overflow_semantics
= true;
2042 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
2043 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
2044 use_overflow_semantics
))
2046 /* The index might wrap. */
2050 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
2051 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
2053 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
2056 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
2058 record_biv_for_address_use (dta
->ivopts_data
, iv
);
2063 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2064 object is passed to it in DATA. */
2067 idx_record_use (tree base
, tree
*idx
,
2070 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
2071 find_interesting_uses_op (data
, *idx
);
2072 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2074 find_interesting_uses_op (data
, array_ref_element_size (base
));
2075 find_interesting_uses_op (data
, array_ref_low_bound (base
));
2080 /* If we can prove that TOP = cst * BOT for some constant cst,
2081 store cst to MUL and return true. Otherwise return false.
2082 The returned value is always sign-extended, regardless of the
2083 signedness of TOP and BOT. */
2086 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
2089 enum tree_code code
;
2090 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2091 widest_int res
, p0
, p1
;
2096 if (operand_equal_p (top
, bot
, 0))
2102 code
= TREE_CODE (top
);
2106 mby
= TREE_OPERAND (top
, 1);
2107 if (TREE_CODE (mby
) != INTEGER_CST
)
2110 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2113 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
2118 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2119 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2122 if (code
== MINUS_EXPR
)
2124 *mul
= wi::sext (p0
+ p1
, precision
);
2128 if (TREE_CODE (bot
) != INTEGER_CST
)
2131 p0
= widest_int::from (wi::to_wide (top
), SIGNED
);
2132 p1
= widest_int::from (wi::to_wide (bot
), SIGNED
);
2135 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
2139 if (POLY_INT_CST_P (top
)
2140 && POLY_INT_CST_P (bot
)
2141 && constant_multiple_p (wi::to_poly_widest (top
),
2142 wi::to_poly_widest (bot
), mul
))
2149 /* Return true if memory reference REF with step STEP may be unaligned. */
2152 may_be_unaligned_p (tree ref
, tree step
)
2154 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2155 thus they are not misaligned. */
2156 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
2159 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2160 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2161 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2163 unsigned HOST_WIDE_INT bitpos
;
2164 unsigned int ref_align
;
2165 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2166 if (ref_align
< align
2167 || (bitpos
% align
) != 0
2168 || (bitpos
% BITS_PER_UNIT
) != 0)
2171 unsigned int trailing_zeros
= tree_ctz (step
);
2172 if (trailing_zeros
< HOST_BITS_PER_INT
2173 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2179 /* Return true if EXPR may be non-addressable. */
2182 may_be_nonaddressable_p (tree expr
)
2184 switch (TREE_CODE (expr
))
2187 /* Check if it's a register variable. */
2188 return DECL_HARD_REGISTER (expr
);
2190 case TARGET_MEM_REF
:
2191 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2192 target, thus they are always addressable. */
2196 /* Likewise for MEM_REFs, modulo the storage order. */
2197 return REF_REVERSE_STORAGE_ORDER (expr
);
2200 if (REF_REVERSE_STORAGE_ORDER (expr
))
2202 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2205 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2207 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2208 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2211 case ARRAY_RANGE_REF
:
2212 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2214 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2216 case VIEW_CONVERT_EXPR
:
2217 /* This kind of view-conversions may wrap non-addressable objects
2218 and make them look addressable. After some processing the
2219 non-addressability may be uncovered again, causing ADDR_EXPRs
2220 of inappropriate objects to be built. */
2221 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2222 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2224 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2236 /* Finds addresses in *OP_P inside STMT. */
2239 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2242 tree base
= *op_p
, step
= size_zero_node
;
2244 struct ifs_ivopts_data ifs_ivopts_data
;
2246 /* Do not play with volatile memory references. A bit too conservative,
2247 perhaps, but safe. */
2248 if (gimple_has_volatile_ops (stmt
))
2251 /* Ignore bitfields for now. Not really something terribly complicated
2253 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2256 base
= unshare_expr (base
);
2258 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2260 tree type
= build_pointer_type (TREE_TYPE (base
));
2264 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2266 civ
= get_iv (data
, TMR_BASE (base
));
2270 TMR_BASE (base
) = civ
->base
;
2273 if (TMR_INDEX2 (base
)
2274 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2276 civ
= get_iv (data
, TMR_INDEX2 (base
));
2280 TMR_INDEX2 (base
) = civ
->base
;
2283 if (TMR_INDEX (base
)
2284 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2286 civ
= get_iv (data
, TMR_INDEX (base
));
2290 TMR_INDEX (base
) = civ
->base
;
2295 if (TMR_STEP (base
))
2296 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2298 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2302 if (integer_zerop (step
))
2304 base
= tree_mem_ref_addr (type
, base
);
2308 ifs_ivopts_data
.ivopts_data
= data
;
2309 ifs_ivopts_data
.stmt
= stmt
;
2310 ifs_ivopts_data
.step
= size_zero_node
;
2311 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2312 || integer_zerop (ifs_ivopts_data
.step
))
2314 step
= ifs_ivopts_data
.step
;
2316 /* Check that the base expression is addressable. This needs
2317 to be done after substituting bases of IVs into it. */
2318 if (may_be_nonaddressable_p (base
))
2321 /* Moreover, on strict alignment platforms, check that it is
2322 sufficiently aligned. */
2323 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2326 base
= build_fold_addr_expr (base
);
2328 /* Substituting bases of IVs into the base expression might
2329 have caused folding opportunities. */
2330 if (TREE_CODE (base
) == ADDR_EXPR
)
2332 tree
*ref
= &TREE_OPERAND (base
, 0);
2333 while (handled_component_p (*ref
))
2334 ref
= &TREE_OPERAND (*ref
, 0);
2335 if (TREE_CODE (*ref
) == MEM_REF
)
2337 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2338 TREE_OPERAND (*ref
, 0),
2339 TREE_OPERAND (*ref
, 1));
2346 civ
= alloc_iv (data
, base
, step
);
2347 /* Fail if base object of this memory reference is unknown. */
2348 if (civ
->base_object
== NULL_TREE
)
2351 record_group_use (data
, op_p
, civ
, stmt
, USE_REF_ADDRESS
, TREE_TYPE (*op_p
));
2355 for_each_index (op_p
, idx_record_use
, data
);
2358 /* Finds and records invariants used in STMT. */
2361 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2364 use_operand_p use_p
;
2367 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2369 op
= USE_FROM_PTR (use_p
);
2370 record_invariant (data
, op
, false);
2374 /* CALL calls an internal function. If operand *OP_P will become an
2375 address when the call is expanded, return the type of the memory
2376 being addressed, otherwise return null. */
2379 get_mem_type_for_internal_fn (gcall
*call
, tree
*op_p
)
2381 switch (gimple_call_internal_fn (call
))
2384 case IFN_MASK_LOAD_LANES
:
2385 if (op_p
== gimple_call_arg_ptr (call
, 0))
2386 return TREE_TYPE (gimple_call_lhs (call
));
2389 case IFN_MASK_STORE
:
2390 case IFN_MASK_STORE_LANES
:
2391 if (op_p
== gimple_call_arg_ptr (call
, 0))
2392 return TREE_TYPE (gimple_call_arg (call
, 3));
2400 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2401 Return true if the operand will become an address when STMT
2402 is expanded and record the associated address use if so. */
2405 find_address_like_use (struct ivopts_data
*data
, gimple
*stmt
, tree
*op_p
,
2408 /* Fail if base object of this memory reference is unknown. */
2409 if (iv
->base_object
== NULL_TREE
)
2412 tree mem_type
= NULL_TREE
;
2413 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2414 if (gimple_call_internal_p (call
))
2415 mem_type
= get_mem_type_for_internal_fn (call
, op_p
);
2418 iv
= alloc_iv (data
, iv
->base
, iv
->step
);
2419 record_group_use (data
, op_p
, iv
, stmt
, USE_PTR_ADDRESS
, mem_type
);
2425 /* Finds interesting uses of induction variables in the statement STMT. */
2428 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2431 tree op
, *lhs
, *rhs
;
2433 use_operand_p use_p
;
2434 enum tree_code code
;
2436 find_invariants_stmt (data
, stmt
);
2438 if (gimple_code (stmt
) == GIMPLE_COND
)
2440 find_interesting_uses_cond (data
, stmt
);
2444 if (is_gimple_assign (stmt
))
2446 lhs
= gimple_assign_lhs_ptr (stmt
);
2447 rhs
= gimple_assign_rhs1_ptr (stmt
);
2449 if (TREE_CODE (*lhs
) == SSA_NAME
)
2451 /* If the statement defines an induction variable, the uses are not
2452 interesting by themselves. */
2454 iv
= get_iv (data
, *lhs
);
2456 if (iv
&& !integer_zerop (iv
->step
))
2460 code
= gimple_assign_rhs_code (stmt
);
2461 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2462 && (REFERENCE_CLASS_P (*rhs
)
2463 || is_gimple_val (*rhs
)))
2465 if (REFERENCE_CLASS_P (*rhs
))
2466 find_interesting_uses_address (data
, stmt
, rhs
);
2468 find_interesting_uses_op (data
, *rhs
);
2470 if (REFERENCE_CLASS_P (*lhs
))
2471 find_interesting_uses_address (data
, stmt
, lhs
);
2474 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2476 find_interesting_uses_cond (data
, stmt
);
2480 /* TODO -- we should also handle address uses of type
2482 memory = call (whatever);
2489 if (gimple_code (stmt
) == GIMPLE_PHI
2490 && gimple_bb (stmt
) == data
->current_loop
->header
)
2492 iv
= get_iv (data
, PHI_RESULT (stmt
));
2494 if (iv
&& !integer_zerop (iv
->step
))
2498 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2500 op
= USE_FROM_PTR (use_p
);
2502 if (TREE_CODE (op
) != SSA_NAME
)
2505 iv
= get_iv (data
, op
);
2509 if (!find_address_like_use (data
, stmt
, use_p
->use
, iv
))
2510 find_interesting_uses_op (data
, op
);
2514 /* Finds interesting uses of induction variables outside of loops
2515 on loop exit edge EXIT. */
2518 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2524 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2527 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2528 if (!virtual_operand_p (def
))
2529 find_interesting_uses_op (data
, def
);
2533 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2534 mode for memory reference represented by USE. */
2536 static GTY (()) vec
<rtx
, va_gc
> *addr_list
;
2539 addr_offset_valid_p (struct iv_use
*use
, poly_int64 offset
)
2542 unsigned list_index
;
2543 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2544 machine_mode addr_mode
, mem_mode
= TYPE_MODE (use
->mem_type
);
2546 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2547 if (list_index
>= vec_safe_length (addr_list
))
2548 vec_safe_grow_cleared (addr_list
, list_index
+ MAX_MACHINE_MODE
);
2550 addr
= (*addr_list
)[list_index
];
2553 addr_mode
= targetm
.addr_space
.address_mode (as
);
2554 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2555 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2556 (*addr_list
)[list_index
] = addr
;
2559 addr_mode
= GET_MODE (addr
);
2561 XEXP (addr
, 1) = gen_int_mode (offset
, addr_mode
);
2562 return (memory_address_addr_space_p (mem_mode
, addr
, as
));
2565 /* Comparison function to sort group in ascending order of addr_offset. */
2568 group_compare_offset (const void *a
, const void *b
)
2570 const struct iv_use
*const *u1
= (const struct iv_use
*const *) a
;
2571 const struct iv_use
*const *u2
= (const struct iv_use
*const *) b
;
2573 return compare_sizes_for_sort ((*u1
)->addr_offset
, (*u2
)->addr_offset
);
2576 /* Check if small groups should be split. Return true if no group
2577 contains more than two uses with distinct addr_offsets. Return
2578 false otherwise. We want to split such groups because:
2580 1) Small groups don't have much benefit and may interfer with
2581 general candidate selection.
2582 2) Size for problem with only small groups is usually small and
2583 general algorithm can handle it well.
2585 TODO -- Above claim may not hold when we want to merge memory
2586 accesses with conseuctive addresses. */
2589 split_small_address_groups_p (struct ivopts_data
*data
)
2591 unsigned int i
, j
, distinct
= 1;
2593 struct iv_group
*group
;
2595 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2597 group
= data
->vgroups
[i
];
2598 if (group
->vuses
.length () == 1)
2601 gcc_assert (address_p (group
->type
));
2602 if (group
->vuses
.length () == 2)
2604 if (compare_sizes_for_sort (group
->vuses
[0]->addr_offset
,
2605 group
->vuses
[1]->addr_offset
) > 0)
2606 std::swap (group
->vuses
[0], group
->vuses
[1]);
2609 group
->vuses
.qsort (group_compare_offset
);
2615 for (pre
= group
->vuses
[0], j
= 1; j
< group
->vuses
.length (); j
++)
2617 if (maybe_ne (group
->vuses
[j
]->addr_offset
, pre
->addr_offset
))
2619 pre
= group
->vuses
[j
];
2628 return (distinct
<= 2);
2631 /* For each group of address type uses, this function further groups
2632 these uses according to the maximum offset supported by target's
2633 [base + offset] addressing mode. */
2636 split_address_groups (struct ivopts_data
*data
)
2639 /* Always split group. */
2640 bool split_p
= split_small_address_groups_p (data
);
2642 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2644 struct iv_group
*new_group
= NULL
;
2645 struct iv_group
*group
= data
->vgroups
[i
];
2646 struct iv_use
*use
= group
->vuses
[0];
2649 use
->group_id
= group
->id
;
2650 if (group
->vuses
.length () == 1)
2653 gcc_assert (address_p (use
->type
));
2655 for (j
= 1; j
< group
->vuses
.length ();)
2657 struct iv_use
*next
= group
->vuses
[j
];
2658 poly_int64 offset
= next
->addr_offset
- use
->addr_offset
;
2660 /* Split group if aksed to, or the offset against the first
2661 use can't fit in offset part of addressing mode. IV uses
2662 having the same offset are still kept in one group. */
2663 if (maybe_ne (offset
, 0)
2664 && (split_p
|| !addr_offset_valid_p (use
, offset
)))
2667 new_group
= record_group (data
, group
->type
);
2668 group
->vuses
.ordered_remove (j
);
2669 new_group
->vuses
.safe_push (next
);
2674 next
->group_id
= group
->id
;
2680 /* Finds uses of the induction variables that are interesting. */
2683 find_interesting_uses (struct ivopts_data
*data
)
2686 gimple_stmt_iterator bsi
;
2687 basic_block
*body
= get_loop_body (data
->current_loop
);
2691 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2696 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2697 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2698 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2699 find_interesting_uses_outside (data
, e
);
2701 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2702 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2703 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2704 if (!is_gimple_debug (gsi_stmt (bsi
)))
2705 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2709 split_address_groups (data
);
2711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2713 fprintf (dump_file
, "\n<IV Groups>:\n");
2714 dump_groups (dump_file
, data
);
2715 fprintf (dump_file
, "\n");
2719 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2720 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2721 we are at the top-level of the processed address. */
2724 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2727 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2728 enum tree_code code
;
2729 tree type
, orig_type
= TREE_TYPE (expr
);
2730 poly_int64 off0
, off1
;
2732 tree orig_expr
= expr
;
2736 type
= TREE_TYPE (expr
);
2737 code
= TREE_CODE (expr
);
2742 case POINTER_PLUS_EXPR
:
2745 op0
= TREE_OPERAND (expr
, 0);
2746 op1
= TREE_OPERAND (expr
, 1);
2748 op0
= strip_offset_1 (op0
, false, false, &off0
);
2749 op1
= strip_offset_1 (op1
, false, false, &off1
);
2751 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2752 if (op0
== TREE_OPERAND (expr
, 0)
2753 && op1
== TREE_OPERAND (expr
, 1))
2756 if (integer_zerop (op1
))
2758 else if (integer_zerop (op0
))
2760 if (code
== MINUS_EXPR
)
2761 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2766 expr
= fold_build2 (code
, type
, op0
, op1
);
2768 return fold_convert (orig_type
, expr
);
2771 op1
= TREE_OPERAND (expr
, 1);
2772 if (!cst_and_fits_in_hwi (op1
))
2775 op0
= TREE_OPERAND (expr
, 0);
2776 op0
= strip_offset_1 (op0
, false, false, &off0
);
2777 if (op0
== TREE_OPERAND (expr
, 0))
2780 *offset
= off0
* int_cst_value (op1
);
2781 if (integer_zerop (op0
))
2784 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2786 return fold_convert (orig_type
, expr
);
2789 case ARRAY_RANGE_REF
:
2793 step
= array_ref_element_size (expr
);
2794 if (!cst_and_fits_in_hwi (step
))
2797 st
= int_cst_value (step
);
2798 op1
= TREE_OPERAND (expr
, 1);
2799 op1
= strip_offset_1 (op1
, false, false, &off1
);
2800 *offset
= off1
* st
;
2803 && integer_zerop (op1
))
2805 /* Strip the component reference completely. */
2806 op0
= TREE_OPERAND (expr
, 0);
2807 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2820 tmp
= component_ref_field_offset (expr
);
2821 field
= TREE_OPERAND (expr
, 1);
2823 && cst_and_fits_in_hwi (tmp
)
2824 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2826 HOST_WIDE_INT boffset
, abs_off
;
2828 /* Strip the component reference completely. */
2829 op0
= TREE_OPERAND (expr
, 0);
2830 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2831 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2832 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2836 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2843 op0
= TREE_OPERAND (expr
, 0);
2844 op0
= strip_offset_1 (op0
, true, true, &off0
);
2847 if (op0
== TREE_OPERAND (expr
, 0))
2850 expr
= build_fold_addr_expr (op0
);
2851 return fold_convert (orig_type
, expr
);
2854 /* ??? Offset operand? */
2855 inside_addr
= false;
2859 if (ptrdiff_tree_p (expr
, offset
) && maybe_ne (*offset
, 0))
2860 return build_int_cst (orig_type
, 0);
2864 /* Default handling of expressions for that we want to recurse into
2865 the first operand. */
2866 op0
= TREE_OPERAND (expr
, 0);
2867 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2870 if (op0
== TREE_OPERAND (expr
, 0)
2871 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2874 expr
= copy_node (expr
);
2875 TREE_OPERAND (expr
, 0) = op0
;
2877 TREE_OPERAND (expr
, 1) = op1
;
2879 /* Inside address, we might strip the top level component references,
2880 thus changing type of the expression. Handling of ADDR_EXPR
2882 expr
= fold_convert (orig_type
, expr
);
2887 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2890 strip_offset (tree expr
, poly_uint64_pod
*offset
)
2893 tree core
= strip_offset_1 (expr
, false, false, &off
);
2898 /* Returns variant of TYPE that can be used as base for different uses.
2899 We return unsigned type with the same precision, which avoids problems
2903 generic_type_for (tree type
)
2905 if (POINTER_TYPE_P (type
))
2906 return unsigned_type_for (type
);
2908 if (TYPE_UNSIGNED (type
))
2911 return unsigned_type_for (type
);
2914 /* Private data for walk_tree. */
2916 struct walk_tree_data
2919 struct ivopts_data
*idata
;
2922 /* Callback function for walk_tree, it records invariants and symbol
2923 reference in *EXPR_P. DATA is the structure storing result info. */
2926 find_inv_vars_cb (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2929 struct version_info
*info
;
2930 struct walk_tree_data
*wdata
= (struct walk_tree_data
*) data
;
2932 if (TREE_CODE (op
) != SSA_NAME
)
2935 info
= name_info (wdata
->idata
, op
);
2936 /* Because we expand simple operations when finding IVs, loop invariant
2937 variable that isn't referred by the original loop could be used now.
2938 Record such invariant variables here. */
2941 struct ivopts_data
*idata
= wdata
->idata
;
2942 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
2944 if (!bb
|| !flow_bb_inside_loop_p (idata
->current_loop
, bb
))
2946 set_iv (idata
, op
, op
, build_int_cst (TREE_TYPE (op
), 0), true);
2947 record_invariant (idata
, op
, false);
2950 if (!info
->inv_id
|| info
->has_nonlin_use
)
2953 if (!*wdata
->inv_vars
)
2954 *wdata
->inv_vars
= BITMAP_ALLOC (NULL
);
2955 bitmap_set_bit (*wdata
->inv_vars
, info
->inv_id
);
2960 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2964 find_inv_vars (struct ivopts_data
*data
, tree
*expr_p
, bitmap
*inv_vars
)
2966 struct walk_tree_data wdata
;
2972 wdata
.inv_vars
= inv_vars
;
2973 walk_tree (expr_p
, find_inv_vars_cb
, &wdata
, NULL
);
2976 /* Get entry from invariant expr hash table for INV_EXPR. New entry
2977 will be recorded if it doesn't exist yet. Given below two exprs:
2978 inv_expr + cst1, inv_expr + cst2
2979 It's hard to make decision whether constant part should be stripped
2980 or not. We choose to not strip based on below facts:
2981 1) We need to count ADD cost for constant part if it's stripped,
2982 which isn't always trivial where this functions is called.
2983 2) Stripping constant away may be conflict with following loop
2984 invariant hoisting pass.
2985 3) Not stripping constant away results in more invariant exprs,
2986 which usually leads to decision preferring lower reg pressure. */
2988 static iv_inv_expr_ent
*
2989 get_loop_invariant_expr (struct ivopts_data
*data
, tree inv_expr
)
2991 STRIP_NOPS (inv_expr
);
2993 if (poly_int_tree_p (inv_expr
)
2994 || TREE_CODE (inv_expr
) == SSA_NAME
)
2997 /* Don't strip constant part away as we used to. */
2999 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3000 struct iv_inv_expr_ent ent
;
3001 ent
.expr
= inv_expr
;
3002 ent
.hash
= iterative_hash_expr (inv_expr
, 0);
3003 struct iv_inv_expr_ent
**slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3007 *slot
= XNEW (struct iv_inv_expr_ent
);
3008 (*slot
)->expr
= inv_expr
;
3009 (*slot
)->hash
= ent
.hash
;
3010 (*slot
)->id
= ++data
->max_inv_expr_id
;
3016 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3017 position to POS. If USE is not NULL, the candidate is set as related to
3018 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3019 replacement of the final value of the iv by a direct computation. */
3021 static struct iv_cand
*
3022 add_candidate_1 (struct ivopts_data
*data
,
3023 tree base
, tree step
, bool important
, enum iv_position pos
,
3024 struct iv_use
*use
, gimple
*incremented_at
,
3025 struct iv
*orig_iv
= NULL
)
3028 struct iv_cand
*cand
= NULL
;
3029 tree type
, orig_type
;
3031 gcc_assert (base
&& step
);
3033 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3034 live, but the ivopts code may replace a real pointer with one
3035 pointing before or after the memory block that is then adjusted
3036 into the memory block during the loop. FIXME: It would likely be
3037 better to actually force the pointer live and still use ivopts;
3038 for example, it would be enough to write the pointer into memory
3039 and keep it there until after the loop. */
3040 if (flag_keep_gc_roots_live
&& POINTER_TYPE_P (TREE_TYPE (base
)))
3043 /* For non-original variables, make sure their values are computed in a type
3044 that does not invoke undefined behavior on overflows (since in general,
3045 we cannot prove that these induction variables are non-wrapping). */
3046 if (pos
!= IP_ORIGINAL
)
3048 orig_type
= TREE_TYPE (base
);
3049 type
= generic_type_for (orig_type
);
3050 if (type
!= orig_type
)
3052 base
= fold_convert (type
, base
);
3053 step
= fold_convert (type
, step
);
3057 for (i
= 0; i
< data
->vcands
.length (); i
++)
3059 cand
= data
->vcands
[i
];
3061 if (cand
->pos
!= pos
)
3064 if (cand
->incremented_at
!= incremented_at
3065 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3066 && cand
->ainc_use
!= use
))
3069 if (operand_equal_p (base
, cand
->iv
->base
, 0)
3070 && operand_equal_p (step
, cand
->iv
->step
, 0)
3071 && (TYPE_PRECISION (TREE_TYPE (base
))
3072 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
3076 if (i
== data
->vcands
.length ())
3078 cand
= XCNEW (struct iv_cand
);
3080 cand
->iv
= alloc_iv (data
, base
, step
);
3082 if (pos
!= IP_ORIGINAL
)
3084 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
3085 cand
->var_after
= cand
->var_before
;
3087 cand
->important
= important
;
3088 cand
->incremented_at
= incremented_at
;
3089 data
->vcands
.safe_push (cand
);
3091 if (!poly_int_tree_p (step
))
3093 find_inv_vars (data
, &step
, &cand
->inv_vars
);
3095 iv_inv_expr_ent
*inv_expr
= get_loop_invariant_expr (data
, step
);
3096 /* Share bitmap between inv_vars and inv_exprs for cand. */
3097 if (inv_expr
!= NULL
)
3099 cand
->inv_exprs
= cand
->inv_vars
;
3100 cand
->inv_vars
= NULL
;
3101 if (cand
->inv_exprs
)
3102 bitmap_clear (cand
->inv_exprs
);
3104 cand
->inv_exprs
= BITMAP_ALLOC (NULL
);
3106 bitmap_set_bit (cand
->inv_exprs
, inv_expr
->id
);
3110 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3111 cand
->ainc_use
= use
;
3113 cand
->ainc_use
= NULL
;
3115 cand
->orig_iv
= orig_iv
;
3116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3117 dump_cand (dump_file
, cand
);
3120 cand
->important
|= important
;
3122 /* Relate candidate to the group for which it is added. */
3124 bitmap_set_bit (data
->vgroups
[use
->group_id
]->related_cands
, i
);
3129 /* Returns true if incrementing the induction variable at the end of the LOOP
3132 The purpose is to avoid splitting latch edge with a biv increment, thus
3133 creating a jump, possibly confusing other optimization passes and leaving
3134 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3135 available (so we do not have a better alternative), or if the latch edge
3136 is already nonempty. */
3139 allow_ip_end_pos_p (struct loop
*loop
)
3141 if (!ip_normal_pos (loop
))
3144 if (!empty_block_p (ip_end_pos (loop
)))
3150 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3151 Important field is set to IMPORTANT. */
3154 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
3155 bool important
, struct iv_use
*use
)
3157 basic_block use_bb
= gimple_bb (use
->stmt
);
3158 machine_mode mem_mode
;
3159 unsigned HOST_WIDE_INT cstepi
;
3161 /* If we insert the increment in any position other than the standard
3162 ones, we must ensure that it is incremented once per iteration.
3163 It must not be in an inner nested loop, or one side of an if
3165 if (use_bb
->loop_father
!= data
->current_loop
3166 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
3167 || stmt_can_throw_internal (cfun
, use
->stmt
)
3168 || !cst_and_fits_in_hwi (step
))
3171 cstepi
= int_cst_value (step
);
3173 mem_mode
= TYPE_MODE (use
->mem_type
);
3174 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
3175 || USE_STORE_PRE_INCREMENT (mem_mode
))
3176 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3177 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
3178 || USE_STORE_PRE_DECREMENT (mem_mode
))
3179 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3181 enum tree_code code
= MINUS_EXPR
;
3183 tree new_step
= step
;
3185 if (POINTER_TYPE_P (TREE_TYPE (base
)))
3187 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3188 code
= POINTER_PLUS_EXPR
;
3191 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
3192 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
3193 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
3196 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
3197 || USE_STORE_POST_INCREMENT (mem_mode
))
3198 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3199 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
3200 || USE_STORE_POST_DECREMENT (mem_mode
))
3201 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3203 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
3208 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3209 position to POS. If USE is not NULL, the candidate is set as related to
3210 it. The candidate computation is scheduled before exit condition and at
3214 add_candidate (struct ivopts_data
*data
,
3215 tree base
, tree step
, bool important
, struct iv_use
*use
,
3216 struct iv
*orig_iv
= NULL
)
3218 if (ip_normal_pos (data
->current_loop
))
3219 add_candidate_1 (data
, base
, step
, important
,
3220 IP_NORMAL
, use
, NULL
, orig_iv
);
3221 if (ip_end_pos (data
->current_loop
)
3222 && allow_ip_end_pos_p (data
->current_loop
))
3223 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3226 /* Adds standard iv candidates. */
3229 add_standard_iv_candidates (struct ivopts_data
*data
)
3231 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3233 /* The same for a double-integer type if it is still fast enough. */
3235 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3236 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3237 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3238 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3240 /* The same for a double-integer type if it is still fast enough. */
3242 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3243 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3244 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3245 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3249 /* Adds candidates bases on the old induction variable IV. */
3252 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3256 struct iv_cand
*cand
;
3258 /* Check if this biv is used in address type use. */
3259 if (iv
->no_overflow
&& iv
->have_address_use
3260 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3261 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3263 tree base
= fold_convert (sizetype
, iv
->base
);
3264 tree step
= fold_convert (sizetype
, iv
->step
);
3266 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3267 add_candidate (data
, base
, step
, true, NULL
, iv
);
3268 /* Add iv cand of the original type only if it has nonlinear use. */
3270 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3273 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3275 /* The same, but with initial value zero. */
3276 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3277 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3279 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3280 iv
->step
, true, NULL
);
3282 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3283 if (gimple_code (phi
) == GIMPLE_PHI
)
3285 /* Additionally record the possibility of leaving the original iv
3287 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3288 /* Don't add candidate if it's from another PHI node because
3289 it's an affine iv appearing in the form of PEELED_CHREC. */
3290 phi
= SSA_NAME_DEF_STMT (def
);
3291 if (gimple_code (phi
) != GIMPLE_PHI
)
3293 cand
= add_candidate_1 (data
,
3294 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3295 SSA_NAME_DEF_STMT (def
));
3298 cand
->var_before
= iv
->ssa_name
;
3299 cand
->var_after
= def
;
3303 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3307 /* Adds candidates based on the old induction variables. */
3310 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3316 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3318 iv
= ver_info (data
, i
)->iv
;
3319 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3320 add_iv_candidate_for_biv (data
, iv
);
3324 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3327 record_common_cand (struct ivopts_data
*data
, tree base
,
3328 tree step
, struct iv_use
*use
)
3330 struct iv_common_cand ent
;
3331 struct iv_common_cand
**slot
;
3335 ent
.hash
= iterative_hash_expr (base
, 0);
3336 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3338 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3341 *slot
= new iv_common_cand ();
3342 (*slot
)->base
= base
;
3343 (*slot
)->step
= step
;
3344 (*slot
)->uses
.create (8);
3345 (*slot
)->hash
= ent
.hash
;
3346 data
->iv_common_cands
.safe_push ((*slot
));
3349 gcc_assert (use
!= NULL
);
3350 (*slot
)->uses
.safe_push (use
);
3354 /* Comparison function used to sort common candidates. */
3357 common_cand_cmp (const void *p1
, const void *p2
)
3360 const struct iv_common_cand
*const *const ccand1
3361 = (const struct iv_common_cand
*const *)p1
;
3362 const struct iv_common_cand
*const *const ccand2
3363 = (const struct iv_common_cand
*const *)p2
;
3365 n1
= (*ccand1
)->uses
.length ();
3366 n2
= (*ccand2
)->uses
.length ();
3370 /* Adds IV candidates based on common candidated recorded. */
3373 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3376 struct iv_cand
*cand_1
, *cand_2
;
3378 data
->iv_common_cands
.qsort (common_cand_cmp
);
3379 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3381 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3383 /* Only add IV candidate if it's derived from multiple uses. */
3384 if (ptr
->uses
.length () <= 1)
3389 if (ip_normal_pos (data
->current_loop
))
3390 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3391 false, IP_NORMAL
, NULL
, NULL
);
3393 if (ip_end_pos (data
->current_loop
)
3394 && allow_ip_end_pos_p (data
->current_loop
))
3395 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3396 false, IP_END
, NULL
, NULL
);
3398 /* Bind deriving uses and the new candidates. */
3399 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3401 struct iv_group
*group
= data
->vgroups
[ptr
->uses
[j
]->group_id
];
3403 bitmap_set_bit (group
->related_cands
, cand_1
->id
);
3405 bitmap_set_bit (group
->related_cands
, cand_2
->id
);
3409 /* Release data since it is useless from this point. */
3410 data
->iv_common_cand_tab
->empty ();
3411 data
->iv_common_cands
.truncate (0);
3414 /* Adds candidates based on the value of USE's iv. */
3417 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3422 struct iv
*iv
= use
->iv
;
3424 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3426 /* Record common candidate for use in case it can be shared by others. */
3427 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3429 /* Record common candidate with initial value zero. */
3430 basetype
= TREE_TYPE (iv
->base
);
3431 if (POINTER_TYPE_P (basetype
))
3432 basetype
= sizetype
;
3433 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3435 /* Compare the cost of an address with an unscaled index with the cost of
3436 an address with a scaled index and add candidate if useful. */
3439 && poly_int_tree_p (iv
->step
, &step
)
3440 && address_p (use
->type
))
3442 poly_int64 new_step
;
3443 unsigned int fact
= preferred_mem_scale_factor
3445 TYPE_MODE (use
->mem_type
),
3446 optimize_loop_for_speed_p (data
->current_loop
));
3449 && multiple_p (step
, fact
, &new_step
))
3450 add_candidate (data
, size_int (0),
3451 wide_int_to_tree (sizetype
, new_step
),
3455 /* Record common candidate with constant offset stripped in base.
3456 Like the use itself, we also add candidate directly for it. */
3457 base
= strip_offset (iv
->base
, &offset
);
3458 if (maybe_ne (offset
, 0U) || base
!= iv
->base
)
3460 record_common_cand (data
, base
, iv
->step
, use
);
3461 add_candidate (data
, base
, iv
->step
, false, use
);
3464 /* Record common candidate with base_object removed in base. */
3467 if (iv
->base_object
!= NULL
&& TREE_CODE (base
) == POINTER_PLUS_EXPR
)
3469 tree step
= iv
->step
;
3472 base
= TREE_OPERAND (base
, 1);
3473 step
= fold_convert (sizetype
, step
);
3474 record_common_cand (data
, base
, step
, use
);
3475 /* Also record common candidate with offset stripped. */
3476 base
= strip_offset (base
, &offset
);
3477 if (maybe_ne (offset
, 0U))
3478 record_common_cand (data
, base
, step
, use
);
3481 /* At last, add auto-incremental candidates. Make such variables
3482 important since other iv uses with same base object may be based
3484 if (use
!= NULL
&& address_p (use
->type
))
3485 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3488 /* Adds candidates based on the uses. */
3491 add_iv_candidate_for_groups (struct ivopts_data
*data
)
3495 /* Only add candidate for the first use in group. */
3496 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3498 struct iv_group
*group
= data
->vgroups
[i
];
3500 gcc_assert (group
->vuses
[0] != NULL
);
3501 add_iv_candidate_for_use (data
, group
->vuses
[0]);
3503 add_iv_candidate_derived_from_uses (data
);
3506 /* Record important candidates and add them to related_cands bitmaps. */
3509 record_important_candidates (struct ivopts_data
*data
)
3512 struct iv_group
*group
;
3514 for (i
= 0; i
< data
->vcands
.length (); i
++)
3516 struct iv_cand
*cand
= data
->vcands
[i
];
3518 if (cand
->important
)
3519 bitmap_set_bit (data
->important_candidates
, i
);
3522 data
->consider_all_candidates
= (data
->vcands
.length ()
3523 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3525 /* Add important candidates to groups' related_cands bitmaps. */
3526 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3528 group
= data
->vgroups
[i
];
3529 bitmap_ior_into (group
->related_cands
, data
->important_candidates
);
3533 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3534 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3535 we allocate a simple list to every use. */
3538 alloc_use_cost_map (struct ivopts_data
*data
)
3540 unsigned i
, size
, s
;
3542 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3544 struct iv_group
*group
= data
->vgroups
[i
];
3546 if (data
->consider_all_candidates
)
3547 size
= data
->vcands
.length ();
3550 s
= bitmap_count_bits (group
->related_cands
);
3552 /* Round up to the power of two, so that moduling by it is fast. */
3553 size
= s
? (1 << ceil_log2 (s
)) : 1;
3556 group
->n_map_members
= size
;
3557 group
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3561 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3562 on invariants INV_VARS and that the value used in expressing it is
3563 VALUE, and in case of iv elimination the comparison operator is COMP. */
3566 set_group_iv_cost (struct ivopts_data
*data
,
3567 struct iv_group
*group
, struct iv_cand
*cand
,
3568 comp_cost cost
, bitmap inv_vars
, tree value
,
3569 enum tree_code comp
, bitmap inv_exprs
)
3573 if (cost
.infinite_cost_p ())
3575 BITMAP_FREE (inv_vars
);
3576 BITMAP_FREE (inv_exprs
);
3580 if (data
->consider_all_candidates
)
3582 group
->cost_map
[cand
->id
].cand
= cand
;
3583 group
->cost_map
[cand
->id
].cost
= cost
;
3584 group
->cost_map
[cand
->id
].inv_vars
= inv_vars
;
3585 group
->cost_map
[cand
->id
].inv_exprs
= inv_exprs
;
3586 group
->cost_map
[cand
->id
].value
= value
;
3587 group
->cost_map
[cand
->id
].comp
= comp
;
3591 /* n_map_members is a power of two, so this computes modulo. */
3592 s
= cand
->id
& (group
->n_map_members
- 1);
3593 for (i
= s
; i
< group
->n_map_members
; i
++)
3594 if (!group
->cost_map
[i
].cand
)
3596 for (i
= 0; i
< s
; i
++)
3597 if (!group
->cost_map
[i
].cand
)
3603 group
->cost_map
[i
].cand
= cand
;
3604 group
->cost_map
[i
].cost
= cost
;
3605 group
->cost_map
[i
].inv_vars
= inv_vars
;
3606 group
->cost_map
[i
].inv_exprs
= inv_exprs
;
3607 group
->cost_map
[i
].value
= value
;
3608 group
->cost_map
[i
].comp
= comp
;
3611 /* Gets cost of (GROUP, CAND) pair. */
3613 static struct cost_pair
*
3614 get_group_iv_cost (struct ivopts_data
*data
, struct iv_group
*group
,
3615 struct iv_cand
*cand
)
3618 struct cost_pair
*ret
;
3623 if (data
->consider_all_candidates
)
3625 ret
= group
->cost_map
+ cand
->id
;
3632 /* n_map_members is a power of two, so this computes modulo. */
3633 s
= cand
->id
& (group
->n_map_members
- 1);
3634 for (i
= s
; i
< group
->n_map_members
; i
++)
3635 if (group
->cost_map
[i
].cand
== cand
)
3636 return group
->cost_map
+ i
;
3637 else if (group
->cost_map
[i
].cand
== NULL
)
3639 for (i
= 0; i
< s
; i
++)
3640 if (group
->cost_map
[i
].cand
== cand
)
3641 return group
->cost_map
+ i
;
3642 else if (group
->cost_map
[i
].cand
== NULL
)
3648 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3650 produce_memory_decl_rtl (tree obj
, int *regno
)
3652 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3653 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3657 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3659 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3660 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3661 SET_SYMBOL_REF_DECL (x
, obj
);
3662 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3663 set_mem_addr_space (x
, as
);
3664 targetm
.encode_section_info (obj
, x
, true);
3668 x
= gen_raw_REG (address_mode
, (*regno
)++);
3669 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3670 set_mem_addr_space (x
, as
);
3676 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3677 walk_tree. DATA contains the actual fake register number. */
3680 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3682 tree obj
= NULL_TREE
;
3684 int *regno
= (int *) data
;
3686 switch (TREE_CODE (*expr_p
))
3689 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3690 handled_component_p (*expr_p
);
3691 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3694 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3695 x
= produce_memory_decl_rtl (obj
, regno
);
3700 obj
= SSA_NAME_VAR (*expr_p
);
3701 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3704 if (!DECL_RTL_SET_P (obj
))
3705 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3714 if (DECL_RTL_SET_P (obj
))
3717 if (DECL_MODE (obj
) == BLKmode
)
3718 x
= produce_memory_decl_rtl (obj
, regno
);
3720 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3730 decl_rtl_to_reset
.safe_push (obj
);
3731 SET_DECL_RTL (obj
, x
);
3737 /* Predict whether the given loop will be transformed in the RTL
3738 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3739 This is only for target independent checks, see targetm.predict_doloop_p
3740 for the target dependent ones.
3742 Note that according to some initial investigation, some checks like costly
3743 niter check and invalid stmt scanning don't have much gains among general
3744 cases, so keep this as simple as possible first.
3746 Some RTL specific checks seems unable to be checked in gimple, if any new
3747 checks or easy checks _are_ missing here, please add them. */
3749 static bool ATTRIBUTE_UNUSED
3750 generic_predict_doloop_p (struct ivopts_data
*data
)
3752 struct loop
*loop
= data
->current_loop
;
3754 /* Call target hook for target dependent checks. */
3755 if (!targetm
.predict_doloop_p (loop
))
3757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3758 fprintf (dump_file
, "Predict doloop failure due to"
3759 " target specific checks.\n");
3763 /* Similar to doloop_optimize, check iteration description to know it's
3764 suitable or not. Keep it as simple as possible, feel free to extend it
3765 if you find any multiple exits cases matter. */
3766 edge exit
= single_dom_exit (loop
);
3767 struct tree_niter_desc
*niter_desc
;
3768 if (!exit
|| !(niter_desc
= niter_for_exit (data
, exit
)))
3770 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3771 fprintf (dump_file
, "Predict doloop failure due to"
3772 " unexpected niters.\n");
3776 /* Similar to doloop_optimize, check whether iteration count too small
3777 and not profitable. */
3778 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
3779 if (est_niter
== -1)
3780 est_niter
= get_likely_max_loop_iterations_int (loop
);
3781 if (est_niter
>= 0 && est_niter
< 3)
3783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3785 "Predict doloop failure due to"
3786 " too few iterations (%u).\n",
3787 (unsigned int) est_niter
);
3794 /* Determines cost of the computation of EXPR. */
3797 computation_cost (tree expr
, bool speed
)
3801 tree type
= TREE_TYPE (expr
);
3803 /* Avoid using hard regs in ways which may be unsupported. */
3804 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3805 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3806 enum node_frequency real_frequency
= node
->frequency
;
3808 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3809 crtl
->maybe_hot_insn_p
= speed
;
3810 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3812 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3815 default_rtl_profile ();
3816 node
->frequency
= real_frequency
;
3818 cost
= seq_cost (seq
, speed
);
3820 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3821 TYPE_ADDR_SPACE (type
), speed
);
3822 else if (!REG_P (rslt
))
3823 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3828 /* Returns variable containing the value of candidate CAND at statement AT. */
3831 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3833 if (stmt_after_increment (loop
, cand
, stmt
))
3834 return cand
->var_after
;
3836 return cand
->var_before
;
3839 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3840 same precision that is at least as wide as the precision of TYPE, stores
3841 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3845 determine_common_wider_type (tree
*a
, tree
*b
)
3847 tree wider_type
= NULL
;
3849 tree atype
= TREE_TYPE (*a
);
3851 if (CONVERT_EXPR_P (*a
))
3853 suba
= TREE_OPERAND (*a
, 0);
3854 wider_type
= TREE_TYPE (suba
);
3855 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3861 if (CONVERT_EXPR_P (*b
))
3863 subb
= TREE_OPERAND (*b
, 0);
3864 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3875 /* Determines the expression by that USE is expressed from induction variable
3876 CAND at statement AT in LOOP. The expression is stored in two parts in a
3877 decomposed form. The invariant part is stored in AFF_INV; while variant
3878 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3879 non-null. Returns false if USE cannot be expressed using CAND. */
3882 get_computation_aff_1 (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3883 struct iv_cand
*cand
, struct aff_tree
*aff_inv
,
3884 struct aff_tree
*aff_var
, widest_int
*prat
= NULL
)
3886 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3887 tree cbase
= cand
->iv
->base
, cstep
= cand
->iv
->step
;
3888 tree common_type
, uutype
, var
, cstep_common
;
3889 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3893 /* We must have a precision to express the values of use. */
3894 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3897 var
= var_at_stmt (loop
, cand
, at
);
3898 uutype
= unsigned_type_for (utype
);
3900 /* If the conversion is not noop, perform it. */
3901 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3903 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3904 && (CONVERT_EXPR_P (cstep
) || poly_int_tree_p (cstep
)))
3906 tree inner_base
, inner_step
, inner_type
;
3907 inner_base
= TREE_OPERAND (cbase
, 0);
3908 if (CONVERT_EXPR_P (cstep
))
3909 inner_step
= TREE_OPERAND (cstep
, 0);
3913 inner_type
= TREE_TYPE (inner_base
);
3914 /* If candidate is added from a biv whose type is smaller than
3915 ctype, we know both candidate and the biv won't overflow.
3916 In this case, it's safe to skip the convertion in candidate.
3917 As an example, (unsigned short)((unsigned long)A) equals to
3918 (unsigned short)A, if A has a type no larger than short. */
3919 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3925 cbase
= fold_convert (uutype
, cbase
);
3926 cstep
= fold_convert (uutype
, cstep
);
3927 var
= fold_convert (uutype
, var
);
3930 /* Ratio is 1 when computing the value of biv cand by itself.
3931 We can't rely on constant_multiple_of in this case because the
3932 use is created after the original biv is selected. The call
3933 could fail because of inconsistent fold behavior. See PR68021
3934 for more information. */
3935 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
3937 gcc_assert (is_gimple_assign (use
->stmt
));
3938 gcc_assert (use
->iv
->ssa_name
== cand
->var_after
);
3939 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
3942 else if (!constant_multiple_of (ustep
, cstep
, &rat
))
3948 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3949 type, we achieve better folding by computing their difference in this
3950 wider type, and cast the result to UUTYPE. We do not need to worry about
3951 overflows, as all the arithmetics will in the end be performed in UUTYPE
3953 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3955 /* use = ubase - ratio * cbase + ratio * var. */
3956 tree_to_aff_combination (ubase
, common_type
, aff_inv
);
3957 tree_to_aff_combination (cbase
, common_type
, &aff_cbase
);
3958 tree_to_aff_combination (var
, uutype
, aff_var
);
3960 /* We need to shift the value if we are after the increment. */
3961 if (stmt_after_increment (loop
, cand
, at
))
3965 if (common_type
!= uutype
)
3966 cstep_common
= fold_convert (common_type
, cstep
);
3968 cstep_common
= cstep
;
3970 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3971 aff_combination_add (&aff_cbase
, &cstep_aff
);
3974 aff_combination_scale (&aff_cbase
, -rat
);
3975 aff_combination_add (aff_inv
, &aff_cbase
);
3976 if (common_type
!= uutype
)
3977 aff_combination_convert (aff_inv
, uutype
);
3979 aff_combination_scale (aff_var
, rat
);
3983 /* Determines the expression by that USE is expressed from induction variable
3984 CAND at statement AT in LOOP. The expression is stored in a decomposed
3985 form into AFF. Returns false if USE cannot be expressed using CAND. */
3988 get_computation_aff (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3989 struct iv_cand
*cand
, struct aff_tree
*aff
)
3993 if (!get_computation_aff_1 (loop
, at
, use
, cand
, aff
, &aff_var
))
3996 aff_combination_add (aff
, &aff_var
);
4000 /* Return the type of USE. */
4003 get_use_type (struct iv_use
*use
)
4005 tree base_type
= TREE_TYPE (use
->iv
->base
);
4008 if (use
->type
== USE_REF_ADDRESS
)
4010 /* The base_type may be a void pointer. Create a pointer type based on
4011 the mem_ref instead. */
4012 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
4013 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
4014 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
4022 /* Determines the expression by that USE is expressed from induction variable
4023 CAND at statement AT in LOOP. The computation is unshared. */
4026 get_computation_at (struct loop
*loop
, gimple
*at
,
4027 struct iv_use
*use
, struct iv_cand
*cand
)
4030 tree type
= get_use_type (use
);
4032 if (!get_computation_aff (loop
, at
, use
, cand
, &aff
))
4034 unshare_aff_combination (&aff
);
4035 return fold_convert (type
, aff_combination_to_tree (&aff
));
4038 /* Adjust the cost COST for being in loop setup rather than loop body.
4039 If we're optimizing for space, the loop setup overhead is constant;
4040 if we're optimizing for speed, amortize it over the per-iteration cost.
4041 If ROUND_UP_P is true, the result is round up rather than to zero when
4042 optimizing for speed. */
4044 adjust_setup_cost (struct ivopts_data
*data
, int64_t cost
,
4045 bool round_up_p
= false)
4049 else if (optimize_loop_for_speed_p (data
->current_loop
))
4051 int64_t niters
= (int64_t) avg_loop_niter (data
->current_loop
);
4052 return (cost
+ (round_up_p
? niters
- 1 : 0)) / niters
;
4058 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4059 EXPR operand holding the shift. COST0 and COST1 are the costs for
4060 calculating the operands of EXPR. Returns true if successful, and returns
4061 the cost in COST. */
4064 get_shiftadd_cost (tree expr
, scalar_int_mode mode
, comp_cost cost0
,
4065 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4068 tree op1
= TREE_OPERAND (expr
, 1);
4069 tree cst
= TREE_OPERAND (mult
, 1);
4070 tree multop
= TREE_OPERAND (mult
, 0);
4071 int m
= exact_log2 (int_cst_value (cst
));
4072 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4073 int as_cost
, sa_cost
;
4076 if (!(m
>= 0 && m
< maxm
))
4080 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4082 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4084 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4085 use that in preference to a shift insn followed by an add insn. */
4086 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4087 ? shiftadd_cost (speed
, mode
, m
)
4089 ? shiftsub1_cost (speed
, mode
, m
)
4090 : shiftsub0_cost (speed
, mode
, m
)));
4092 res
= comp_cost (MIN (as_cost
, sa_cost
), 0);
4093 res
+= (mult_in_op1
? cost0
: cost1
);
4095 STRIP_NOPS (multop
);
4096 if (!is_gimple_val (multop
))
4097 res
+= force_expr_to_var_cost (multop
, speed
);
4103 /* Estimates cost of forcing expression EXPR into a variable. */
4106 force_expr_to_var_cost (tree expr
, bool speed
)
4108 static bool costs_initialized
= false;
4109 static unsigned integer_cost
[2];
4110 static unsigned symbol_cost
[2];
4111 static unsigned address_cost
[2];
4113 comp_cost cost0
, cost1
, cost
;
4115 scalar_int_mode int_mode
;
4117 if (!costs_initialized
)
4119 tree type
= build_pointer_type (integer_type_node
);
4124 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4125 TREE_STATIC (var
) = 1;
4126 x
= produce_memory_decl_rtl (var
, NULL
);
4127 SET_DECL_RTL (var
, x
);
4129 addr
= build1 (ADDR_EXPR
, type
, var
);
4132 for (i
= 0; i
< 2; i
++)
4134 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4137 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4140 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4143 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4144 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4145 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4146 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4147 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4148 fprintf (dump_file
, "\n");
4152 costs_initialized
= true;
4157 if (SSA_VAR_P (expr
))
4160 if (is_gimple_min_invariant (expr
))
4162 if (poly_int_tree_p (expr
))
4163 return comp_cost (integer_cost
[speed
], 0);
4165 if (TREE_CODE (expr
) == ADDR_EXPR
)
4167 tree obj
= TREE_OPERAND (expr
, 0);
4170 || TREE_CODE (obj
) == PARM_DECL
4171 || TREE_CODE (obj
) == RESULT_DECL
)
4172 return comp_cost (symbol_cost
[speed
], 0);
4175 return comp_cost (address_cost
[speed
], 0);
4178 switch (TREE_CODE (expr
))
4180 case POINTER_PLUS_EXPR
:
4184 case TRUNC_DIV_EXPR
:
4189 op0
= TREE_OPERAND (expr
, 0);
4190 op1
= TREE_OPERAND (expr
, 1);
4198 op0
= TREE_OPERAND (expr
, 0);
4204 /* Just an arbitrary value, FIXME. */
4205 return comp_cost (target_spill_cost
[speed
], 0);
4208 if (op0
== NULL_TREE
4209 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4212 cost0
= force_expr_to_var_cost (op0
, speed
);
4214 if (op1
== NULL_TREE
4215 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4218 cost1
= force_expr_to_var_cost (op1
, speed
);
4220 mode
= TYPE_MODE (TREE_TYPE (expr
));
4221 switch (TREE_CODE (expr
))
4223 case POINTER_PLUS_EXPR
:
4227 cost
= comp_cost (add_cost (speed
, mode
), 0);
4228 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4230 tree mult
= NULL_TREE
;
4232 if (TREE_CODE (op1
) == MULT_EXPR
)
4234 else if (TREE_CODE (op0
) == MULT_EXPR
)
4237 if (mult
!= NULL_TREE
4238 && is_a
<scalar_int_mode
> (mode
, &int_mode
)
4239 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4240 && get_shiftadd_cost (expr
, int_mode
, cost0
, cost1
, mult
,
4248 tree inner_mode
, outer_mode
;
4249 outer_mode
= TREE_TYPE (expr
);
4250 inner_mode
= TREE_TYPE (op0
);
4251 cost
= comp_cost (convert_cost (TYPE_MODE (outer_mode
),
4252 TYPE_MODE (inner_mode
), speed
), 0);
4257 if (cst_and_fits_in_hwi (op0
))
4258 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op0
),
4260 else if (cst_and_fits_in_hwi (op1
))
4261 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op1
),
4264 return comp_cost (target_spill_cost
[speed
], 0);
4267 case TRUNC_DIV_EXPR
:
4268 /* Division by power of two is usually cheap, so we allow it. Forbid
4270 if (integer_pow2p (TREE_OPERAND (expr
, 1)))
4271 cost
= comp_cost (add_cost (speed
, mode
), 0);
4273 cost
= comp_cost (target_spill_cost
[speed
], 0);
4281 cost
= comp_cost (add_cost (speed
, mode
), 0);
4293 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4294 invariants the computation depends on. */
4297 force_var_cost (struct ivopts_data
*data
, tree expr
, bitmap
*inv_vars
)
4302 find_inv_vars (data
, &expr
, inv_vars
);
4303 return force_expr_to_var_cost (expr
, data
->speed
);
4306 /* Returns cost of auto-modifying address expression in shape base + offset.
4307 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4308 address expression. The address expression has ADDR_MODE in addr space
4309 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4314 AINC_PRE_INC
, /* Pre increment. */
4315 AINC_PRE_DEC
, /* Pre decrement. */
4316 AINC_POST_INC
, /* Post increment. */
4317 AINC_POST_DEC
, /* Post decrement. */
4318 AINC_NONE
/* Also the number of auto increment types. */
4321 struct ainc_cost_data
4323 int64_t costs
[AINC_NONE
];
4327 get_address_cost_ainc (poly_int64 ainc_step
, poly_int64 ainc_offset
,
4328 machine_mode addr_mode
, machine_mode mem_mode
,
4329 addr_space_t as
, bool speed
)
4331 if (!USE_LOAD_PRE_DECREMENT (mem_mode
)
4332 && !USE_STORE_PRE_DECREMENT (mem_mode
)
4333 && !USE_LOAD_POST_DECREMENT (mem_mode
)
4334 && !USE_STORE_POST_DECREMENT (mem_mode
)
4335 && !USE_LOAD_PRE_INCREMENT (mem_mode
)
4336 && !USE_STORE_PRE_INCREMENT (mem_mode
)
4337 && !USE_LOAD_POST_INCREMENT (mem_mode
)
4338 && !USE_STORE_POST_INCREMENT (mem_mode
))
4339 return infinite_cost
;
4341 static vec
<ainc_cost_data
*> ainc_cost_data_list
;
4342 unsigned idx
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
4343 if (idx
>= ainc_cost_data_list
.length ())
4345 unsigned nsize
= ((unsigned) as
+ 1) *MAX_MACHINE_MODE
;
4347 gcc_assert (nsize
> idx
);
4348 ainc_cost_data_list
.safe_grow_cleared (nsize
);
4351 ainc_cost_data
*data
= ainc_cost_data_list
[idx
];
4354 rtx reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4356 data
= (ainc_cost_data
*) xcalloc (1, sizeof (*data
));
4357 data
->costs
[AINC_PRE_DEC
] = INFTY
;
4358 data
->costs
[AINC_POST_DEC
] = INFTY
;
4359 data
->costs
[AINC_PRE_INC
] = INFTY
;
4360 data
->costs
[AINC_POST_INC
] = INFTY
;
4361 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4362 || USE_STORE_PRE_DECREMENT (mem_mode
))
4364 rtx addr
= gen_rtx_PRE_DEC (addr_mode
, reg
);
4366 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4367 data
->costs
[AINC_PRE_DEC
]
4368 = address_cost (addr
, mem_mode
, as
, speed
);
4370 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4371 || USE_STORE_POST_DECREMENT (mem_mode
))
4373 rtx addr
= gen_rtx_POST_DEC (addr_mode
, reg
);
4375 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4376 data
->costs
[AINC_POST_DEC
]
4377 = address_cost (addr
, mem_mode
, as
, speed
);
4379 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4380 || USE_STORE_PRE_INCREMENT (mem_mode
))
4382 rtx addr
= gen_rtx_PRE_INC (addr_mode
, reg
);
4384 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4385 data
->costs
[AINC_PRE_INC
]
4386 = address_cost (addr
, mem_mode
, as
, speed
);
4388 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4389 || USE_STORE_POST_INCREMENT (mem_mode
))
4391 rtx addr
= gen_rtx_POST_INC (addr_mode
, reg
);
4393 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4394 data
->costs
[AINC_POST_INC
]
4395 = address_cost (addr
, mem_mode
, as
, speed
);
4397 ainc_cost_data_list
[idx
] = data
;
4400 poly_int64 msize
= GET_MODE_SIZE (mem_mode
);
4401 if (known_eq (ainc_offset
, 0) && known_eq (msize
, ainc_step
))
4402 return comp_cost (data
->costs
[AINC_POST_INC
], 0);
4403 if (known_eq (ainc_offset
, 0) && known_eq (msize
, -ainc_step
))
4404 return comp_cost (data
->costs
[AINC_POST_DEC
], 0);
4405 if (known_eq (ainc_offset
, msize
) && known_eq (msize
, ainc_step
))
4406 return comp_cost (data
->costs
[AINC_PRE_INC
], 0);
4407 if (known_eq (ainc_offset
, -msize
) && known_eq (msize
, -ainc_step
))
4408 return comp_cost (data
->costs
[AINC_PRE_DEC
], 0);
4410 return infinite_cost
;
4413 /* Return cost of computing USE's address expression by using CAND.
4414 AFF_INV and AFF_VAR represent invariant and variant parts of the
4415 address expression, respectively. If AFF_INV is simple, store
4416 the loop invariant variables which are depended by it in INV_VARS;
4417 if AFF_INV is complicated, handle it as a new invariant expression
4418 and record it in INV_EXPR. RATIO indicates multiple times between
4419 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4420 value to it indicating if this is an auto-increment address. */
4423 get_address_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4424 struct iv_cand
*cand
, aff_tree
*aff_inv
,
4425 aff_tree
*aff_var
, HOST_WIDE_INT ratio
,
4426 bitmap
*inv_vars
, iv_inv_expr_ent
**inv_expr
,
4427 bool *can_autoinc
, bool speed
)
4430 bool simple_inv
= true;
4431 tree comp_inv
= NULL_TREE
, type
= aff_var
->type
;
4432 comp_cost var_cost
= no_cost
, cost
= no_cost
;
4433 struct mem_address parts
= {NULL_TREE
, integer_one_node
,
4434 NULL_TREE
, NULL_TREE
, NULL_TREE
};
4435 machine_mode addr_mode
= TYPE_MODE (type
);
4436 machine_mode mem_mode
= TYPE_MODE (use
->mem_type
);
4437 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
4438 /* Only true if ratio != 1. */
4439 bool ok_with_ratio_p
= false;
4440 bool ok_without_ratio_p
= false;
4442 if (!aff_combination_const_p (aff_inv
))
4444 parts
.index
= integer_one_node
;
4445 /* Addressing mode "base + index". */
4446 ok_without_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4449 parts
.step
= wide_int_to_tree (type
, ratio
);
4450 /* Addressing mode "base + index << scale". */
4451 ok_with_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4452 if (!ok_with_ratio_p
)
4453 parts
.step
= NULL_TREE
;
4455 if (ok_with_ratio_p
|| ok_without_ratio_p
)
4457 if (maybe_ne (aff_inv
->offset
, 0))
4459 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4460 /* Addressing mode "base + index [<< scale] + offset". */
4461 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4462 parts
.offset
= NULL_TREE
;
4464 aff_inv
->offset
= 0;
4467 move_fixed_address_to_symbol (&parts
, aff_inv
);
4468 /* Base is fixed address and is moved to symbol part. */
4469 if (parts
.symbol
!= NULL_TREE
&& aff_combination_zero_p (aff_inv
))
4470 parts
.base
= NULL_TREE
;
4472 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4473 if (parts
.symbol
!= NULL_TREE
4474 && !valid_mem_ref_p (mem_mode
, as
, &parts
))
4476 aff_combination_add_elt (aff_inv
, parts
.symbol
, 1);
4477 parts
.symbol
= NULL_TREE
;
4478 /* Reset SIMPLE_INV since symbol address needs to be computed
4479 outside of address expression in this case. */
4481 /* Symbol part is moved back to base part, it can't be NULL. */
4482 parts
.base
= integer_one_node
;
4486 parts
.index
= NULL_TREE
;
4490 poly_int64 ainc_step
;
4493 && ptrdiff_tree_p (cand
->iv
->step
, &ainc_step
))
4495 poly_int64 ainc_offset
= (aff_inv
->offset
).force_shwi ();
4497 if (stmt_after_increment (data
->current_loop
, cand
, use
->stmt
))
4498 ainc_offset
+= ainc_step
;
4499 cost
= get_address_cost_ainc (ainc_step
, ainc_offset
,
4500 addr_mode
, mem_mode
, as
, speed
);
4501 if (!cost
.infinite_cost_p ())
4503 *can_autoinc
= true;
4508 if (!aff_combination_zero_p (aff_inv
))
4510 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4511 /* Addressing mode "base + offset". */
4512 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4513 parts
.offset
= NULL_TREE
;
4515 aff_inv
->offset
= 0;
4520 simple_inv
= (aff_inv
== NULL
4521 || aff_combination_const_p (aff_inv
)
4522 || aff_combination_singleton_var_p (aff_inv
));
4523 if (!aff_combination_zero_p (aff_inv
))
4524 comp_inv
= aff_combination_to_tree (aff_inv
);
4525 if (comp_inv
!= NULL_TREE
)
4526 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4527 if (ratio
!= 1 && parts
.step
== NULL_TREE
)
4528 var_cost
+= mult_by_coeff_cost (ratio
, addr_mode
, speed
);
4529 if (comp_inv
!= NULL_TREE
&& parts
.index
== NULL_TREE
)
4530 var_cost
+= add_cost (speed
, addr_mode
);
4532 if (comp_inv
&& inv_expr
&& !simple_inv
)
4534 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4535 /* Clear depends on. */
4536 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4537 bitmap_clear (*inv_vars
);
4539 /* Cost of small invariant expression adjusted against loop niters
4540 is usually zero, which makes it difficult to be differentiated
4541 from candidate based on loop invariant variables. Secondly, the
4542 generated invariant expression may not be hoisted out of loop by
4543 following pass. We penalize the cost by rounding up in order to
4544 neutralize such effects. */
4545 cost
.cost
= adjust_setup_cost (data
, cost
.cost
, true);
4546 cost
.scratch
= cost
.cost
;
4550 addr
= addr_for_mem_ref (&parts
, as
, false);
4551 gcc_assert (memory_address_addr_space_p (mem_mode
, addr
, as
));
4552 cost
+= address_cost (addr
, mem_mode
, as
, speed
);
4554 if (parts
.symbol
!= NULL_TREE
)
4555 cost
.complexity
+= 1;
4556 /* Don't increase the complexity of adding a scaled index if it's
4557 the only kind of index that the target allows. */
4558 if (parts
.step
!= NULL_TREE
&& ok_without_ratio_p
)
4559 cost
.complexity
+= 1;
4560 if (parts
.base
!= NULL_TREE
&& parts
.index
!= NULL_TREE
)
4561 cost
.complexity
+= 1;
4562 if (parts
.offset
!= NULL_TREE
&& !integer_zerop (parts
.offset
))
4563 cost
.complexity
+= 1;
4568 /* Scale (multiply) the computed COST (except scratch part that should be
4569 hoisted out a loop) by header->frequency / AT->frequency, which makes
4570 expected cost more accurate. */
4573 get_scaled_computation_cost_at (ivopts_data
*data
, gimple
*at
, comp_cost cost
)
4576 && data
->current_loop
->header
->count
.to_frequency (cfun
) > 0)
4578 basic_block bb
= gimple_bb (at
);
4579 gcc_assert (cost
.scratch
<= cost
.cost
);
4580 int scale_factor
= (int)(intptr_t) bb
->aux
;
4581 if (scale_factor
== 1)
4585 = cost
.scratch
+ (cost
.cost
- cost
.scratch
) * scale_factor
;
4587 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4588 fprintf (dump_file
, "Scaling cost based on bb prob by %2.2f: "
4589 "%" PRId64
" (scratch: %" PRId64
") -> %" PRId64
"\n",
4590 1.0f
* scale_factor
, cost
.cost
, cost
.scratch
, scaled_cost
);
4592 cost
.cost
= scaled_cost
;
4598 /* Determines the cost of the computation by that USE is expressed
4599 from induction variable CAND. If ADDRESS_P is true, we just need
4600 to create an address from it, otherwise we want to get it into
4601 register. A set of invariants we depend on is stored in INV_VARS.
4602 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4603 addressing is likely. If INV_EXPR is nonnull, record invariant
4604 expr entry in it. */
4607 get_computation_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4608 struct iv_cand
*cand
, bool address_p
, bitmap
*inv_vars
,
4609 bool *can_autoinc
, iv_inv_expr_ent
**inv_expr
)
4611 gimple
*at
= use
->stmt
;
4612 tree ubase
= use
->iv
->base
, cbase
= cand
->iv
->base
;
4613 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
4614 tree comp_inv
= NULL_TREE
;
4615 HOST_WIDE_INT ratio
, aratio
;
4618 aff_tree aff_inv
, aff_var
;
4619 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4624 *can_autoinc
= false;
4628 /* Check if we have enough precision to express the values of use. */
4629 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4630 return infinite_cost
;
4633 || (use
->iv
->base_object
4634 && cand
->iv
->base_object
4635 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4636 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4638 /* Do not try to express address of an object with computation based
4639 on address of a different object. This may cause problems in rtl
4640 level alias analysis (that does not expect this to be happening,
4641 as this is illegal in C), and would be unlikely to be useful
4643 if (use
->iv
->base_object
4644 && cand
->iv
->base_object
4645 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4646 return infinite_cost
;
4649 if (!get_computation_aff_1 (data
->current_loop
, at
, use
,
4650 cand
, &aff_inv
, &aff_var
, &rat
)
4651 || !wi::fits_shwi_p (rat
))
4652 return infinite_cost
;
4654 ratio
= rat
.to_shwi ();
4657 cost
= get_address_cost (data
, use
, cand
, &aff_inv
, &aff_var
, ratio
,
4658 inv_vars
, inv_expr
, can_autoinc
, speed
);
4659 return get_scaled_computation_cost_at (data
, at
, cost
);
4662 bool simple_inv
= (aff_combination_const_p (&aff_inv
)
4663 || aff_combination_singleton_var_p (&aff_inv
));
4664 tree signed_type
= signed_type_for (aff_combination_type (&aff_inv
));
4665 aff_combination_convert (&aff_inv
, signed_type
);
4666 if (!aff_combination_zero_p (&aff_inv
))
4667 comp_inv
= aff_combination_to_tree (&aff_inv
);
4669 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4670 if (comp_inv
&& inv_expr
&& !simple_inv
)
4672 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4673 /* Clear depends on. */
4674 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4675 bitmap_clear (*inv_vars
);
4677 cost
.cost
= adjust_setup_cost (data
, cost
.cost
);
4678 /* Record setup cost in scratch field. */
4679 cost
.scratch
= cost
.cost
;
4681 /* Cost of constant integer can be covered when adding invariant part to
4683 else if (comp_inv
&& CONSTANT_CLASS_P (comp_inv
))
4686 /* Need type narrowing to represent use with cand. */
4687 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4689 machine_mode outer_mode
= TYPE_MODE (utype
);
4690 machine_mode inner_mode
= TYPE_MODE (ctype
);
4691 cost
+= comp_cost (convert_cost (outer_mode
, inner_mode
, speed
), 0);
4694 /* Turn a + i * (-c) into a - i * c. */
4695 if (ratio
< 0 && comp_inv
&& !integer_zerop (comp_inv
))
4701 cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (utype
), speed
);
4703 /* TODO: We may also need to check if we can compute a + i * 4 in one
4705 /* Need to add up the invariant and variant parts. */
4706 if (comp_inv
&& !integer_zerop (comp_inv
))
4707 cost
+= add_cost (speed
, TYPE_MODE (utype
));
4709 return get_scaled_computation_cost_at (data
, at
, cost
);
4712 /* Determines cost of computing the use in GROUP with CAND in a generic
4716 determine_group_iv_cost_generic (struct ivopts_data
*data
,
4717 struct iv_group
*group
, struct iv_cand
*cand
)
4720 iv_inv_expr_ent
*inv_expr
= NULL
;
4721 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4722 struct iv_use
*use
= group
->vuses
[0];
4724 /* The simple case first -- if we need to express value of the preserved
4725 original biv, the cost is 0. This also prevents us from counting the
4726 cost of increment twice -- once at this use and once in the cost of
4728 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
4731 cost
= get_computation_cost (data
, use
, cand
, false,
4732 &inv_vars
, NULL
, &inv_expr
);
4736 inv_exprs
= BITMAP_ALLOC (NULL
);
4737 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4739 set_group_iv_cost (data
, group
, cand
, cost
, inv_vars
,
4740 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4741 return !cost
.infinite_cost_p ();
4744 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4747 determine_group_iv_cost_address (struct ivopts_data
*data
,
4748 struct iv_group
*group
, struct iv_cand
*cand
)
4751 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4753 iv_inv_expr_ent
*inv_expr
= NULL
;
4754 struct iv_use
*use
= group
->vuses
[0];
4755 comp_cost sum_cost
= no_cost
, cost
;
4757 cost
= get_computation_cost (data
, use
, cand
, true,
4758 &inv_vars
, &can_autoinc
, &inv_expr
);
4762 inv_exprs
= BITMAP_ALLOC (NULL
);
4763 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4766 if (!sum_cost
.infinite_cost_p () && cand
->ainc_use
== use
)
4769 sum_cost
-= cand
->cost_step
;
4770 /* If we generated the candidate solely for exploiting autoincrement
4771 opportunities, and it turns out it can't be used, set the cost to
4772 infinity to make sure we ignore it. */
4773 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4774 sum_cost
= infinite_cost
;
4777 /* Uses in a group can share setup code, so only add setup cost once. */
4778 cost
-= cost
.scratch
;
4779 /* Compute and add costs for rest uses of this group. */
4780 for (i
= 1; i
< group
->vuses
.length () && !sum_cost
.infinite_cost_p (); i
++)
4782 struct iv_use
*next
= group
->vuses
[i
];
4784 /* TODO: We could skip computing cost for sub iv_use when it has the
4785 same cost as the first iv_use, but the cost really depends on the
4786 offset and where the iv_use is. */
4787 cost
= get_computation_cost (data
, next
, cand
, true,
4788 NULL
, &can_autoinc
, &inv_expr
);
4792 inv_exprs
= BITMAP_ALLOC (NULL
);
4794 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4798 set_group_iv_cost (data
, group
, cand
, sum_cost
, inv_vars
,
4799 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4801 return !sum_cost
.infinite_cost_p ();
4804 /* Computes value of candidate CAND at position AT in iteration NITER, and
4805 stores it to VAL. */
4808 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4811 aff_tree step
, delta
, nit
;
4812 struct iv
*iv
= cand
->iv
;
4813 tree type
= TREE_TYPE (iv
->base
);
4815 if (POINTER_TYPE_P (type
))
4816 steptype
= sizetype
;
4818 steptype
= unsigned_type_for (type
);
4820 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4821 aff_combination_convert (&step
, steptype
);
4822 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4823 aff_combination_convert (&nit
, steptype
);
4824 aff_combination_mult (&nit
, &step
, &delta
);
4825 if (stmt_after_increment (loop
, cand
, at
))
4826 aff_combination_add (&delta
, &step
);
4828 tree_to_aff_combination (iv
->base
, type
, val
);
4829 if (!POINTER_TYPE_P (type
))
4830 aff_combination_convert (val
, steptype
);
4831 aff_combination_add (val
, &delta
);
4834 /* Returns period of induction variable iv. */
4837 iv_period (struct iv
*iv
)
4839 tree step
= iv
->step
, period
, type
;
4842 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4844 type
= unsigned_type_for (TREE_TYPE (step
));
4845 /* Period of the iv is lcm (step, type_range)/step -1,
4846 i.e., N*type_range/step - 1. Since type range is power
4847 of two, N == (step >> num_of_ending_zeros_binary (step),
4848 so the final result is
4850 (type_range >> num_of_ending_zeros_binary (step)) - 1
4853 pow2div
= num_ending_zeros (step
);
4855 period
= build_low_bits_mask (type
,
4856 (TYPE_PRECISION (type
)
4857 - tree_to_uhwi (pow2div
)));
4862 /* Returns the comparison operator used when eliminating the iv USE. */
4864 static enum tree_code
4865 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4867 struct loop
*loop
= data
->current_loop
;
4871 ex_bb
= gimple_bb (use
->stmt
);
4872 exit
= EDGE_SUCC (ex_bb
, 0);
4873 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4874 exit
= EDGE_SUCC (ex_bb
, 1);
4876 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4879 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4880 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4881 calculation is performed in non-wrapping type.
4883 TODO: More generally, we could test for the situation that
4884 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4885 This would require knowing the sign of OFFSET. */
4888 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4890 enum tree_code code
;
4892 aff_tree aff_e1
, aff_e2
, aff_offset
;
4894 if (!nowrap_type_p (TREE_TYPE (base
)))
4897 base
= expand_simple_operations (base
);
4899 if (TREE_CODE (base
) == SSA_NAME
)
4901 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
4903 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4906 code
= gimple_assign_rhs_code (stmt
);
4907 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4910 e1
= gimple_assign_rhs1 (stmt
);
4911 e2
= gimple_assign_rhs2 (stmt
);
4915 code
= TREE_CODE (base
);
4916 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4918 e1
= TREE_OPERAND (base
, 0);
4919 e2
= TREE_OPERAND (base
, 1);
4922 /* Use affine expansion as deeper inspection to prove the equality. */
4923 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4924 &aff_e2
, &data
->name_expansion_cache
);
4925 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4926 &aff_offset
, &data
->name_expansion_cache
);
4927 aff_combination_scale (&aff_offset
, -1);
4931 aff_combination_add (&aff_e2
, &aff_offset
);
4932 if (aff_combination_zero_p (&aff_e2
))
4935 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4936 &aff_e1
, &data
->name_expansion_cache
);
4937 aff_combination_add (&aff_e1
, &aff_offset
);
4938 return aff_combination_zero_p (&aff_e1
);
4940 case POINTER_PLUS_EXPR
:
4941 aff_combination_add (&aff_e2
, &aff_offset
);
4942 return aff_combination_zero_p (&aff_e2
);
4949 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4950 comparison with CAND. NITER describes the number of iterations of
4951 the loops. If successful, the comparison in COMP_P is altered accordingly.
4953 We aim to handle the following situation:
4969 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4970 We aim to optimize this to
4978 while (p < p_0 - a + b);
4980 This preserves the correctness, since the pointer arithmetics does not
4981 overflow. More precisely:
4983 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4984 overflow in computing it or the values of p.
4985 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4986 overflow. To prove this, we use the fact that p_0 = base + a. */
4989 iv_elimination_compare_lt (struct ivopts_data
*data
,
4990 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4991 struct tree_niter_desc
*niter
)
4993 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4994 struct aff_tree nit
, tmpa
, tmpb
;
4995 enum tree_code comp
;
4998 /* We need to know that the candidate induction variable does not overflow.
4999 While more complex analysis may be used to prove this, for now just
5000 check that the variable appears in the original program and that it
5001 is computed in a type that guarantees no overflows. */
5002 cand_type
= TREE_TYPE (cand
->iv
->base
);
5003 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5006 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5007 the calculation of the BOUND could overflow, making the comparison
5009 if (!data
->loop_single_exit_p
)
5012 /* We need to be able to decide whether candidate is increasing or decreasing
5013 in order to choose the right comparison operator. */
5014 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5016 step
= int_cst_value (cand
->iv
->step
);
5018 /* Check that the number of iterations matches the expected pattern:
5019 a + 1 > b ? 0 : b - a - 1. */
5020 mbz
= niter
->may_be_zero
;
5021 if (TREE_CODE (mbz
) == GT_EXPR
)
5023 /* Handle a + 1 > b. */
5024 tree op0
= TREE_OPERAND (mbz
, 0);
5025 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5027 a
= TREE_OPERAND (op0
, 0);
5028 b
= TREE_OPERAND (mbz
, 1);
5033 else if (TREE_CODE (mbz
) == LT_EXPR
)
5035 tree op1
= TREE_OPERAND (mbz
, 1);
5037 /* Handle b < a + 1. */
5038 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5040 a
= TREE_OPERAND (op1
, 0);
5041 b
= TREE_OPERAND (mbz
, 0);
5049 /* Expected number of iterations is B - A - 1. Check that it matches
5050 the actual number, i.e., that B - A - NITER = 1. */
5051 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5052 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5053 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5054 aff_combination_scale (&nit
, -1);
5055 aff_combination_scale (&tmpa
, -1);
5056 aff_combination_add (&tmpb
, &tmpa
);
5057 aff_combination_add (&tmpb
, &nit
);
5058 if (tmpb
.n
!= 0 || maybe_ne (tmpb
.offset
, 1))
5061 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5063 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5065 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5066 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5069 /* Determine the new comparison operator. */
5070 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5071 if (*comp_p
== NE_EXPR
)
5073 else if (*comp_p
== EQ_EXPR
)
5074 *comp_p
= invert_tree_comparison (comp
, false);
5081 /* Check whether it is possible to express the condition in USE by comparison
5082 of candidate CAND. If so, store the value compared with to BOUND, and the
5083 comparison operator to COMP. */
5086 may_eliminate_iv (struct ivopts_data
*data
,
5087 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5088 enum tree_code
*comp
)
5093 struct loop
*loop
= data
->current_loop
;
5095 struct tree_niter_desc
*desc
= NULL
;
5097 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5100 /* For now works only for exits that dominate the loop latch.
5101 TODO: extend to other conditions inside loop body. */
5102 ex_bb
= gimple_bb (use
->stmt
);
5103 if (use
->stmt
!= last_stmt (ex_bb
)
5104 || gimple_code (use
->stmt
) != GIMPLE_COND
5105 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5108 exit
= EDGE_SUCC (ex_bb
, 0);
5109 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5110 exit
= EDGE_SUCC (ex_bb
, 1);
5111 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5114 desc
= niter_for_exit (data
, exit
);
5118 /* Determine whether we can use the variable to test the exit condition.
5119 This is the case iff the period of the induction variable is greater
5120 than the number of iterations for which the exit condition is true. */
5121 period
= iv_period (cand
->iv
);
5123 /* If the number of iterations is constant, compare against it directly. */
5124 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5126 /* See cand_value_at. */
5127 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5129 if (!tree_int_cst_lt (desc
->niter
, period
))
5134 if (tree_int_cst_lt (period
, desc
->niter
))
5139 /* If not, and if this is the only possible exit of the loop, see whether
5140 we can get a conservative estimate on the number of iterations of the
5141 entire loop and compare against that instead. */
5144 widest_int period_value
, max_niter
;
5146 max_niter
= desc
->max
;
5147 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5149 period_value
= wi::to_widest (period
);
5150 if (wi::gtu_p (max_niter
, period_value
))
5152 /* See if we can take advantage of inferred loop bound
5154 if (data
->loop_single_exit_p
)
5156 if (!max_loop_iterations (loop
, &max_niter
))
5158 /* The loop bound is already adjusted by adding 1. */
5159 if (wi::gtu_p (max_niter
, period_value
))
5167 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5169 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5170 aff_combination_to_tree (&bnd
));
5171 *comp
= iv_elimination_compare (data
, use
);
5173 /* It is unlikely that computing the number of iterations using division
5174 would be more profitable than keeping the original induction variable. */
5175 if (expression_expensive_p (*bound
))
5178 /* Sometimes, it is possible to handle the situation that the number of
5179 iterations may be zero unless additional assumptions by using <
5180 instead of != in the exit condition.
5182 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5183 base the exit condition on it. However, that is often too
5185 if (!integer_zerop (desc
->may_be_zero
))
5186 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5191 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5192 be copied, if it is used in the loop body and DATA->body_includes_call. */
5195 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5197 tree sbound
= bound
;
5198 STRIP_NOPS (sbound
);
5200 if (TREE_CODE (sbound
) == SSA_NAME
5201 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5202 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5203 && data
->body_includes_call
)
5204 return COSTS_N_INSNS (1);
5209 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5212 determine_group_iv_cost_cond (struct ivopts_data
*data
,
5213 struct iv_group
*group
, struct iv_cand
*cand
)
5215 tree bound
= NULL_TREE
;
5217 bitmap inv_exprs
= NULL
;
5218 bitmap inv_vars_elim
= NULL
, inv_vars_express
= NULL
, inv_vars
;
5219 comp_cost elim_cost
= infinite_cost
, express_cost
, cost
, bound_cost
;
5220 enum comp_iv_rewrite rewrite_type
;
5221 iv_inv_expr_ent
*inv_expr_elim
= NULL
, *inv_expr_express
= NULL
, *inv_expr
;
5222 tree
*control_var
, *bound_cst
;
5223 enum tree_code comp
= ERROR_MARK
;
5224 struct iv_use
*use
= group
->vuses
[0];
5226 /* Extract condition operands. */
5227 rewrite_type
= extract_cond_operands (data
, use
->stmt
, &control_var
,
5228 &bound_cst
, NULL
, &cmp_iv
);
5229 gcc_assert (rewrite_type
!= COMP_IV_NA
);
5231 /* Try iv elimination. */
5232 if (rewrite_type
== COMP_IV_ELIM
5233 && may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5235 elim_cost
= force_var_cost (data
, bound
, &inv_vars_elim
);
5236 if (elim_cost
.cost
== 0)
5237 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5238 else if (TREE_CODE (bound
) == INTEGER_CST
)
5240 /* If we replace a loop condition 'i < n' with 'p < base + n',
5241 inv_vars_elim will have 'base' and 'n' set, which implies that both
5242 'base' and 'n' will be live during the loop. More likely,
5243 'base + n' will be loop invariant, resulting in only one live value
5244 during the loop. So in that case we clear inv_vars_elim and set
5245 inv_expr_elim instead. */
5246 if (inv_vars_elim
&& bitmap_count_bits (inv_vars_elim
) > 1)
5248 inv_expr_elim
= get_loop_invariant_expr (data
, bound
);
5249 bitmap_clear (inv_vars_elim
);
5251 /* The bound is a loop invariant, so it will be only computed
5253 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5256 /* When the condition is a comparison of the candidate IV against
5257 zero, prefer this IV.
5259 TODO: The constant that we're subtracting from the cost should
5260 be target-dependent. This information should be added to the
5261 target costs for each backend. */
5262 if (!elim_cost
.infinite_cost_p () /* Do not try to decrease infinite! */
5263 && integer_zerop (*bound_cst
)
5264 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5265 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5268 express_cost
= get_computation_cost (data
, use
, cand
, false,
5269 &inv_vars_express
, NULL
,
5272 find_inv_vars (data
, &cmp_iv
->base
, &inv_vars_express
);
5274 /* Count the cost of the original bound as well. */
5275 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5276 if (bound_cost
.cost
== 0)
5277 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5278 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5279 bound_cost
.cost
= 0;
5280 express_cost
+= bound_cost
;
5282 /* Choose the better approach, preferring the eliminated IV. */
5283 if (elim_cost
<= express_cost
)
5286 inv_vars
= inv_vars_elim
;
5287 inv_vars_elim
= NULL
;
5288 inv_expr
= inv_expr_elim
;
5292 cost
= express_cost
;
5293 inv_vars
= inv_vars_express
;
5294 inv_vars_express
= NULL
;
5297 inv_expr
= inv_expr_express
;
5302 inv_exprs
= BITMAP_ALLOC (NULL
);
5303 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
5305 set_group_iv_cost (data
, group
, cand
, cost
,
5306 inv_vars
, bound
, comp
, inv_exprs
);
5309 BITMAP_FREE (inv_vars_elim
);
5310 if (inv_vars_express
)
5311 BITMAP_FREE (inv_vars_express
);
5313 return !cost
.infinite_cost_p ();
5316 /* Determines cost of computing uses in GROUP with CAND. Returns false
5317 if USE cannot be represented with CAND. */
5320 determine_group_iv_cost (struct ivopts_data
*data
,
5321 struct iv_group
*group
, struct iv_cand
*cand
)
5323 switch (group
->type
)
5325 case USE_NONLINEAR_EXPR
:
5326 return determine_group_iv_cost_generic (data
, group
, cand
);
5328 case USE_REF_ADDRESS
:
5329 case USE_PTR_ADDRESS
:
5330 return determine_group_iv_cost_address (data
, group
, cand
);
5333 return determine_group_iv_cost_cond (data
, group
, cand
);
5340 /* Return true if get_computation_cost indicates that autoincrement is
5341 a possibility for the pair of USE and CAND, false otherwise. */
5344 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5345 struct iv_cand
*cand
)
5347 if (!address_p (use
->type
))
5350 bool can_autoinc
= false;
5351 get_computation_cost (data
, use
, cand
, true, NULL
, &can_autoinc
, NULL
);
5355 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5356 use that allows autoincrement, and set their AINC_USE if possible. */
5359 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5363 for (i
= 0; i
< data
->vcands
.length (); i
++)
5365 struct iv_cand
*cand
= data
->vcands
[i
];
5366 struct iv_use
*closest_before
= NULL
;
5367 struct iv_use
*closest_after
= NULL
;
5368 if (cand
->pos
!= IP_ORIGINAL
)
5371 for (j
= 0; j
< data
->vgroups
.length (); j
++)
5373 struct iv_group
*group
= data
->vgroups
[j
];
5374 struct iv_use
*use
= group
->vuses
[0];
5375 unsigned uid
= gimple_uid (use
->stmt
);
5377 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5380 if (uid
< gimple_uid (cand
->incremented_at
)
5381 && (closest_before
== NULL
5382 || uid
> gimple_uid (closest_before
->stmt
)))
5383 closest_before
= use
;
5385 if (uid
> gimple_uid (cand
->incremented_at
)
5386 && (closest_after
== NULL
5387 || uid
< gimple_uid (closest_after
->stmt
)))
5388 closest_after
= use
;
5391 if (closest_before
!= NULL
5392 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5393 cand
->ainc_use
= closest_before
;
5394 else if (closest_after
!= NULL
5395 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5396 cand
->ainc_use
= closest_after
;
5400 /* Relate compare use with all candidates. */
5403 relate_compare_use_with_all_cands (struct ivopts_data
*data
)
5405 unsigned i
, count
= data
->vcands
.length ();
5406 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5408 struct iv_group
*group
= data
->vgroups
[i
];
5410 if (group
->type
== USE_COMPARE
)
5411 bitmap_set_range (group
->related_cands
, 0, count
);
5415 /* Finds the candidates for the induction variables. */
5418 find_iv_candidates (struct ivopts_data
*data
)
5420 /* Add commonly used ivs. */
5421 add_standard_iv_candidates (data
);
5423 /* Add old induction variables. */
5424 add_iv_candidate_for_bivs (data
);
5426 /* Add induction variables derived from uses. */
5427 add_iv_candidate_for_groups (data
);
5429 set_autoinc_for_original_candidates (data
);
5431 /* Record the important candidates. */
5432 record_important_candidates (data
);
5434 /* Relate compare iv_use with all candidates. */
5435 if (!data
->consider_all_candidates
)
5436 relate_compare_use_with_all_cands (data
);
5438 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5442 fprintf (dump_file
, "\n<Important Candidates>:\t");
5443 for (i
= 0; i
< data
->vcands
.length (); i
++)
5444 if (data
->vcands
[i
]->important
)
5445 fprintf (dump_file
, " %d,", data
->vcands
[i
]->id
);
5446 fprintf (dump_file
, "\n");
5448 fprintf (dump_file
, "\n<Group, Cand> Related:\n");
5449 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5451 struct iv_group
*group
= data
->vgroups
[i
];
5453 if (group
->related_cands
)
5455 fprintf (dump_file
, " Group %d:\t", group
->id
);
5456 dump_bitmap (dump_file
, group
->related_cands
);
5459 fprintf (dump_file
, "\n");
5463 /* Determines costs of computing use of iv with an iv candidate. */
5466 determine_group_iv_costs (struct ivopts_data
*data
)
5469 struct iv_cand
*cand
;
5470 struct iv_group
*group
;
5471 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5473 alloc_use_cost_map (data
);
5475 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5477 group
= data
->vgroups
[i
];
5479 if (data
->consider_all_candidates
)
5481 for (j
= 0; j
< data
->vcands
.length (); j
++)
5483 cand
= data
->vcands
[j
];
5484 determine_group_iv_cost (data
, group
, cand
);
5491 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, j
, bi
)
5493 cand
= data
->vcands
[j
];
5494 if (!determine_group_iv_cost (data
, group
, cand
))
5495 bitmap_set_bit (to_clear
, j
);
5498 /* Remove the candidates for that the cost is infinite from
5499 the list of related candidates. */
5500 bitmap_and_compl_into (group
->related_cands
, to_clear
);
5501 bitmap_clear (to_clear
);
5505 BITMAP_FREE (to_clear
);
5507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5511 /* Dump invariant variables. */
5512 fprintf (dump_file
, "\n<Invariant Vars>:\n");
5513 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5515 struct version_info
*info
= ver_info (data
, i
);
5518 fprintf (dump_file
, "Inv %d:\t", info
->inv_id
);
5519 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
5520 fprintf (dump_file
, "%s\n",
5521 info
->has_nonlin_use
? "" : "\t(eliminable)");
5525 /* Dump invariant expressions. */
5526 fprintf (dump_file
, "\n<Invariant Expressions>:\n");
5527 auto_vec
<iv_inv_expr_ent
*> list (data
->inv_expr_tab
->elements ());
5529 for (hash_table
<iv_inv_expr_hasher
>::iterator it
5530 = data
->inv_expr_tab
->begin (); it
!= data
->inv_expr_tab
->end ();
5532 list
.safe_push (*it
);
5534 list
.qsort (sort_iv_inv_expr_ent
);
5536 for (i
= 0; i
< list
.length (); ++i
)
5538 fprintf (dump_file
, "inv_expr %d: \t", list
[i
]->id
);
5539 print_generic_expr (dump_file
, list
[i
]->expr
, TDF_SLIM
);
5540 fprintf (dump_file
, "\n");
5543 fprintf (dump_file
, "\n<Group-candidate Costs>:\n");
5545 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5547 group
= data
->vgroups
[i
];
5549 fprintf (dump_file
, "Group %d:\n", i
);
5550 fprintf (dump_file
, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5551 for (j
= 0; j
< group
->n_map_members
; j
++)
5553 if (!group
->cost_map
[j
].cand
5554 || group
->cost_map
[j
].cost
.infinite_cost_p ())
5557 fprintf (dump_file
, " %d\t%" PRId64
"\t%d\t",
5558 group
->cost_map
[j
].cand
->id
,
5559 group
->cost_map
[j
].cost
.cost
,
5560 group
->cost_map
[j
].cost
.complexity
);
5561 if (!group
->cost_map
[j
].inv_exprs
5562 || bitmap_empty_p (group
->cost_map
[j
].inv_exprs
))
5563 fprintf (dump_file
, "NIL;\t");
5565 bitmap_print (dump_file
,
5566 group
->cost_map
[j
].inv_exprs
, "", ";\t");
5567 if (!group
->cost_map
[j
].inv_vars
5568 || bitmap_empty_p (group
->cost_map
[j
].inv_vars
))
5569 fprintf (dump_file
, "NIL;\n");
5571 bitmap_print (dump_file
,
5572 group
->cost_map
[j
].inv_vars
, "", "\n");
5575 fprintf (dump_file
, "\n");
5577 fprintf (dump_file
, "\n");
5581 /* Determines cost of the candidate CAND. */
5584 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5586 comp_cost cost_base
;
5587 int64_t cost
, cost_step
;
5590 gcc_assert (cand
->iv
!= NULL
);
5592 /* There are two costs associated with the candidate -- its increment
5593 and its initialization. The second is almost negligible for any loop
5594 that rolls enough, so we take it just very little into account. */
5596 base
= cand
->iv
->base
;
5597 cost_base
= force_var_cost (data
, base
, NULL
);
5598 /* It will be exceptional that the iv register happens to be initialized with
5599 the proper value at no cost. In general, there will at least be a regcopy
5601 if (cost_base
.cost
== 0)
5602 cost_base
.cost
= COSTS_N_INSNS (1);
5603 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5605 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5607 /* Prefer the original ivs unless we may gain something by replacing it.
5608 The reason is to make debugging simpler; so this is not relevant for
5609 artificial ivs created by other optimization passes. */
5610 if (cand
->pos
!= IP_ORIGINAL
5611 || !SSA_NAME_VAR (cand
->var_before
)
5612 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5615 /* Prefer not to insert statements into latch unless there are some
5616 already (so that we do not create unnecessary jumps). */
5617 if (cand
->pos
== IP_END
5618 && empty_block_p (ip_end_pos (data
->current_loop
)))
5622 cand
->cost_step
= cost_step
;
5625 /* Determines costs of computation of the candidates. */
5628 determine_iv_costs (struct ivopts_data
*data
)
5632 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5634 fprintf (dump_file
, "<Candidate Costs>:\n");
5635 fprintf (dump_file
, " cand\tcost\n");
5638 for (i
= 0; i
< data
->vcands
.length (); i
++)
5640 struct iv_cand
*cand
= data
->vcands
[i
];
5642 determine_iv_cost (data
, cand
);
5644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5645 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5648 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5649 fprintf (dump_file
, "\n");
5652 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5653 induction variables. Note N_INVS includes both invariant variables and
5654 invariant expressions. */
5657 ivopts_estimate_reg_pressure (struct ivopts_data
*data
, unsigned n_invs
,
5661 unsigned n_old
= data
->regs_used
, n_new
= n_invs
+ n_cands
;
5662 unsigned regs_needed
= n_new
+ n_old
, available_regs
= target_avail_regs
;
5663 bool speed
= data
->speed
;
5665 /* If there is a call in the loop body, the call-clobbered registers
5666 are not available for loop invariants. */
5667 if (data
->body_includes_call
)
5668 available_regs
= available_regs
- target_clobbered_regs
;
5670 /* If we have enough registers. */
5671 if (regs_needed
+ target_res_regs
< available_regs
)
5673 /* If close to running out of registers, try to preserve them. */
5674 else if (regs_needed
<= available_regs
)
5675 cost
= target_reg_cost
[speed
] * regs_needed
;
5676 /* If we run out of available registers but the number of candidates
5677 does not, we penalize extra registers using target_spill_cost. */
5678 else if (n_cands
<= available_regs
)
5679 cost
= target_reg_cost
[speed
] * available_regs
5680 + target_spill_cost
[speed
] * (regs_needed
- available_regs
);
5681 /* If the number of candidates runs out available registers, we penalize
5682 extra candidate registers using target_spill_cost * 2. Because it is
5683 more expensive to spill induction variable than invariant. */
5685 cost
= target_reg_cost
[speed
] * available_regs
5686 + target_spill_cost
[speed
] * (n_cands
- available_regs
) * 2
5687 + target_spill_cost
[speed
] * (regs_needed
- n_cands
);
5689 /* Finally, add the number of candidates, so that we prefer eliminating
5690 induction variables if possible. */
5691 return cost
+ n_cands
;
5694 /* For each size of the induction variable set determine the penalty. */
5697 determine_set_costs (struct ivopts_data
*data
)
5703 struct loop
*loop
= data
->current_loop
;
5706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5708 fprintf (dump_file
, "<Global Costs>:\n");
5709 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5710 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5711 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5712 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5716 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5719 op
= PHI_RESULT (phi
);
5721 if (virtual_operand_p (op
))
5724 if (get_iv (data
, op
))
5727 if (!POINTER_TYPE_P (TREE_TYPE (op
))
5728 && !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5734 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5736 struct version_info
*info
= ver_info (data
, j
);
5738 if (info
->inv_id
&& info
->has_nonlin_use
)
5742 data
->regs_used
= n
;
5743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5744 fprintf (dump_file
, " regs_used %d\n", n
);
5746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5748 fprintf (dump_file
, " cost for size:\n");
5749 fprintf (dump_file
, " ivs\tcost\n");
5750 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5751 fprintf (dump_file
, " %d\t%d\n", j
,
5752 ivopts_estimate_reg_pressure (data
, 0, j
));
5753 fprintf (dump_file
, "\n");
5757 /* Returns true if A is a cheaper cost pair than B. */
5760 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5768 if (a
->cost
< b
->cost
)
5771 if (b
->cost
< a
->cost
)
5774 /* In case the costs are the same, prefer the cheaper candidate. */
5775 if (a
->cand
->cost
< b
->cand
->cost
)
5781 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5782 for more expensive, equal and cheaper respectively. */
5785 compare_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5787 if (cheaper_cost_pair (a
, b
))
5789 if (cheaper_cost_pair (b
, a
))
5795 /* Returns candidate by that USE is expressed in IVS. */
5797 static struct cost_pair
*
5798 iv_ca_cand_for_group (struct iv_ca
*ivs
, struct iv_group
*group
)
5800 return ivs
->cand_for_group
[group
->id
];
5803 /* Computes the cost field of IVS structure. */
5806 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5808 comp_cost cost
= ivs
->cand_use_cost
;
5810 cost
+= ivs
->cand_cost
;
5811 cost
+= ivopts_estimate_reg_pressure (data
, ivs
->n_invs
, ivs
->n_cands
);
5815 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5819 iv_ca_set_remove_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5827 gcc_assert (n_inv_uses
!= NULL
);
5828 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5831 if (n_inv_uses
[iid
] == 0)
5836 /* Set USE not to be expressed by any candidate in IVS. */
5839 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5840 struct iv_group
*group
)
5842 unsigned gid
= group
->id
, cid
;
5843 struct cost_pair
*cp
;
5845 cp
= ivs
->cand_for_group
[gid
];
5851 ivs
->cand_for_group
[gid
] = NULL
;
5852 ivs
->n_cand_uses
[cid
]--;
5854 if (ivs
->n_cand_uses
[cid
] == 0)
5856 bitmap_clear_bit (ivs
->cands
, cid
);
5858 ivs
->cand_cost
-= cp
->cand
->cost
;
5859 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5860 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5863 ivs
->cand_use_cost
-= cp
->cost
;
5864 iv_ca_set_remove_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5865 iv_ca_set_remove_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5866 iv_ca_recount_cost (data
, ivs
);
5869 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5873 iv_ca_set_add_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5881 gcc_assert (n_inv_uses
!= NULL
);
5882 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5885 if (n_inv_uses
[iid
] == 1)
5890 /* Set cost pair for GROUP in set IVS to CP. */
5893 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5894 struct iv_group
*group
, struct cost_pair
*cp
)
5896 unsigned gid
= group
->id
, cid
;
5898 if (ivs
->cand_for_group
[gid
] == cp
)
5901 if (ivs
->cand_for_group
[gid
])
5902 iv_ca_set_no_cp (data
, ivs
, group
);
5909 ivs
->cand_for_group
[gid
] = cp
;
5910 ivs
->n_cand_uses
[cid
]++;
5911 if (ivs
->n_cand_uses
[cid
] == 1)
5913 bitmap_set_bit (ivs
->cands
, cid
);
5915 ivs
->cand_cost
+= cp
->cand
->cost
;
5916 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5917 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5920 ivs
->cand_use_cost
+= cp
->cost
;
5921 iv_ca_set_add_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5922 iv_ca_set_add_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5923 iv_ca_recount_cost (data
, ivs
);
5927 /* Extend set IVS by expressing USE by some of the candidates in it
5928 if possible. Consider all important candidates if candidates in
5929 set IVS don't give any result. */
5932 iv_ca_add_group (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5933 struct iv_group
*group
)
5935 struct cost_pair
*best_cp
= NULL
, *cp
;
5938 struct iv_cand
*cand
;
5940 gcc_assert (ivs
->upto
>= group
->id
);
5944 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5946 cand
= data
->vcands
[i
];
5947 cp
= get_group_iv_cost (data
, group
, cand
);
5948 if (cheaper_cost_pair (cp
, best_cp
))
5952 if (best_cp
== NULL
)
5954 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5956 cand
= data
->vcands
[i
];
5957 cp
= get_group_iv_cost (data
, group
, cand
);
5958 if (cheaper_cost_pair (cp
, best_cp
))
5963 iv_ca_set_cp (data
, ivs
, group
, best_cp
);
5966 /* Get cost for assignment IVS. */
5969 iv_ca_cost (struct iv_ca
*ivs
)
5971 /* This was a conditional expression but it triggered a bug in
5973 if (ivs
->bad_groups
)
5974 return infinite_cost
;
5979 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5980 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5984 iv_ca_compare_deps (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5985 struct iv_group
*group
, struct cost_pair
*old_cp
,
5986 struct cost_pair
*new_cp
)
5988 gcc_assert (old_cp
&& new_cp
&& old_cp
!= new_cp
);
5989 unsigned old_n_invs
= ivs
->n_invs
;
5990 iv_ca_set_cp (data
, ivs
, group
, new_cp
);
5991 unsigned new_n_invs
= ivs
->n_invs
;
5992 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
5994 return new_n_invs
> old_n_invs
? 1 : (new_n_invs
< old_n_invs
? -1 : 0);
5997 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6000 static struct iv_ca_delta
*
6001 iv_ca_delta_add (struct iv_group
*group
, struct cost_pair
*old_cp
,
6002 struct cost_pair
*new_cp
, struct iv_ca_delta
*next
)
6004 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6006 change
->group
= group
;
6007 change
->old_cp
= old_cp
;
6008 change
->new_cp
= new_cp
;
6009 change
->next
= next
;
6014 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6017 static struct iv_ca_delta
*
6018 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6020 struct iv_ca_delta
*last
;
6028 for (last
= l1
; last
->next
; last
= last
->next
)
6035 /* Reverse the list of changes DELTA, forming the inverse to it. */
6037 static struct iv_ca_delta
*
6038 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6040 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6042 for (act
= delta
; act
; act
= next
)
6048 std::swap (act
->old_cp
, act
->new_cp
);
6054 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6055 reverted instead. */
6058 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6059 struct iv_ca_delta
*delta
, bool forward
)
6061 struct cost_pair
*from
, *to
;
6062 struct iv_ca_delta
*act
;
6065 delta
= iv_ca_delta_reverse (delta
);
6067 for (act
= delta
; act
; act
= act
->next
)
6071 gcc_assert (iv_ca_cand_for_group (ivs
, act
->group
) == from
);
6072 iv_ca_set_cp (data
, ivs
, act
->group
, to
);
6076 iv_ca_delta_reverse (delta
);
6079 /* Returns true if CAND is used in IVS. */
6082 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6084 return ivs
->n_cand_uses
[cand
->id
] > 0;
6087 /* Returns number of induction variable candidates in the set IVS. */
6090 iv_ca_n_cands (struct iv_ca
*ivs
)
6092 return ivs
->n_cands
;
6095 /* Free the list of changes DELTA. */
6098 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6100 struct iv_ca_delta
*act
, *next
;
6102 for (act
= *delta
; act
; act
= next
)
6111 /* Allocates new iv candidates assignment. */
6113 static struct iv_ca
*
6114 iv_ca_new (struct ivopts_data
*data
)
6116 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6120 nw
->cand_for_group
= XCNEWVEC (struct cost_pair
*,
6121 data
->vgroups
.length ());
6122 nw
->n_cand_uses
= XCNEWVEC (unsigned, data
->vcands
.length ());
6123 nw
->cands
= BITMAP_ALLOC (NULL
);
6126 nw
->cand_use_cost
= no_cost
;
6128 nw
->n_inv_var_uses
= XCNEWVEC (unsigned, data
->max_inv_var_id
+ 1);
6129 nw
->n_inv_expr_uses
= XCNEWVEC (unsigned, data
->max_inv_expr_id
+ 1);
6135 /* Free memory occupied by the set IVS. */
6138 iv_ca_free (struct iv_ca
**ivs
)
6140 free ((*ivs
)->cand_for_group
);
6141 free ((*ivs
)->n_cand_uses
);
6142 BITMAP_FREE ((*ivs
)->cands
);
6143 free ((*ivs
)->n_inv_var_uses
);
6144 free ((*ivs
)->n_inv_expr_uses
);
6149 /* Dumps IVS to FILE. */
6152 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6155 comp_cost cost
= iv_ca_cost (ivs
);
6157 fprintf (file
, " cost: %" PRId64
" (complexity %d)\n", cost
.cost
,
6159 fprintf (file
, " cand_cost: %" PRId64
"\n cand_group_cost: "
6160 "%" PRId64
" (complexity %d)\n", ivs
->cand_cost
,
6161 ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6162 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6164 for (i
= 0; i
< ivs
->upto
; i
++)
6166 struct iv_group
*group
= data
->vgroups
[i
];
6167 struct cost_pair
*cp
= iv_ca_cand_for_group (ivs
, group
);
6169 fprintf (file
, " group:%d --> iv_cand:%d, cost=("
6170 "%" PRId64
",%d)\n", group
->id
, cp
->cand
->id
,
6171 cp
->cost
.cost
, cp
->cost
.complexity
);
6173 fprintf (file
, " group:%d --> ??\n", group
->id
);
6176 const char *pref
= "";
6177 fprintf (file
, " invariant variables: ");
6178 for (i
= 1; i
<= data
->max_inv_var_id
; i
++)
6179 if (ivs
->n_inv_var_uses
[i
])
6181 fprintf (file
, "%s%d", pref
, i
);
6186 fprintf (file
, "\n invariant expressions: ");
6187 for (i
= 1; i
<= data
->max_inv_expr_id
; i
++)
6188 if (ivs
->n_inv_expr_uses
[i
])
6190 fprintf (file
, "%s%d", pref
, i
);
6194 fprintf (file
, "\n\n");
6197 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6198 new set, and store differences in DELTA. Number of induction variables
6199 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6200 the function will try to find a solution with mimimal iv candidates. */
6203 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6204 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6205 unsigned *n_ivs
, bool min_ncand
)
6209 struct iv_group
*group
;
6210 struct cost_pair
*old_cp
, *new_cp
;
6213 for (i
= 0; i
< ivs
->upto
; i
++)
6215 group
= data
->vgroups
[i
];
6216 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6219 && old_cp
->cand
== cand
)
6222 new_cp
= get_group_iv_cost (data
, group
, cand
);
6228 int cmp_invs
= iv_ca_compare_deps (data
, ivs
, group
, old_cp
, new_cp
);
6229 /* Skip if new_cp depends on more invariants. */
6233 int cmp_cost
= compare_cost_pair (new_cp
, old_cp
);
6234 /* Skip if new_cp is not cheaper. */
6235 if (cmp_cost
> 0 || (cmp_cost
== 0 && cmp_invs
== 0))
6239 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6242 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6243 cost
= iv_ca_cost (ivs
);
6245 *n_ivs
= iv_ca_n_cands (ivs
);
6246 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6251 /* Try narrowing set IVS by removing CAND. Return the cost of
6252 the new set and store the differences in DELTA. START is
6253 the candidate with which we start narrowing. */
6256 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6257 struct iv_cand
*cand
, struct iv_cand
*start
,
6258 struct iv_ca_delta
**delta
)
6261 struct iv_group
*group
;
6262 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6264 struct iv_cand
*cnd
;
6265 comp_cost cost
, best_cost
, acost
;
6268 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6270 group
= data
->vgroups
[i
];
6272 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6273 if (old_cp
->cand
!= cand
)
6276 best_cost
= iv_ca_cost (ivs
);
6277 /* Start narrowing with START. */
6278 new_cp
= get_group_iv_cost (data
, group
, start
);
6280 if (data
->consider_all_candidates
)
6282 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6284 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6287 cnd
= data
->vcands
[ci
];
6289 cp
= get_group_iv_cost (data
, group
, cnd
);
6293 iv_ca_set_cp (data
, ivs
, group
, cp
);
6294 acost
= iv_ca_cost (ivs
);
6296 if (acost
< best_cost
)
6305 EXECUTE_IF_AND_IN_BITMAP (group
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6307 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6310 cnd
= data
->vcands
[ci
];
6312 cp
= get_group_iv_cost (data
, group
, cnd
);
6316 iv_ca_set_cp (data
, ivs
, group
, cp
);
6317 acost
= iv_ca_cost (ivs
);
6319 if (acost
< best_cost
)
6326 /* Restore to old cp for use. */
6327 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
6331 iv_ca_delta_free (delta
);
6332 return infinite_cost
;
6335 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6338 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6339 cost
= iv_ca_cost (ivs
);
6340 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6345 /* Try optimizing the set of candidates IVS by removing candidates different
6346 from to EXCEPT_CAND from it. Return cost of the new set, and store
6347 differences in DELTA. */
6350 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6351 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6354 struct iv_ca_delta
*act_delta
, *best_delta
;
6356 comp_cost best_cost
, acost
;
6357 struct iv_cand
*cand
;
6360 best_cost
= iv_ca_cost (ivs
);
6362 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6364 cand
= data
->vcands
[i
];
6366 if (cand
== except_cand
)
6369 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6371 if (acost
< best_cost
)
6374 iv_ca_delta_free (&best_delta
);
6375 best_delta
= act_delta
;
6378 iv_ca_delta_free (&act_delta
);
6387 /* Recurse to possibly remove other unnecessary ivs. */
6388 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6389 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6390 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6391 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6395 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6396 cheaper local cost for GROUP than BEST_CP. Return pointer to
6397 the corresponding cost_pair, otherwise just return BEST_CP. */
6399 static struct cost_pair
*
6400 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_group
*group
,
6401 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6402 struct cost_pair
*best_cp
)
6404 struct iv_cand
*cand
;
6405 struct cost_pair
*cp
;
6407 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6408 if (cand_idx
== old_cand
->id
)
6411 cand
= data
->vcands
[cand_idx
];
6412 cp
= get_group_iv_cost (data
, group
, cand
);
6413 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6419 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6420 which are used by more than one iv uses. For each of those candidates,
6421 this function tries to represent iv uses under that candidate using
6422 other ones with lower local cost, then tries to prune the new set.
6423 If the new set has lower cost, It returns the new cost after recording
6424 candidate replacement in list DELTA. */
6427 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6428 struct iv_ca_delta
**delta
)
6430 bitmap_iterator bi
, bj
;
6431 unsigned int i
, j
, k
;
6432 struct iv_cand
*cand
;
6433 comp_cost orig_cost
, acost
;
6434 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6435 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6438 orig_cost
= iv_ca_cost (ivs
);
6440 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6442 if (ivs
->n_cand_uses
[i
] == 1
6443 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6446 cand
= data
->vcands
[i
];
6449 /* Represent uses under current candidate using other ones with
6450 lower local cost. */
6451 for (j
= 0; j
< ivs
->upto
; j
++)
6453 struct iv_group
*group
= data
->vgroups
[j
];
6454 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6456 if (old_cp
->cand
!= cand
)
6460 if (data
->consider_all_candidates
)
6461 for (k
= 0; k
< data
->vcands
.length (); k
++)
6462 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6463 old_cp
->cand
, best_cp
);
6465 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, k
, bj
)
6466 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6467 old_cp
->cand
, best_cp
);
6469 if (best_cp
== old_cp
)
6472 act_delta
= iv_ca_delta_add (group
, old_cp
, best_cp
, act_delta
);
6474 /* No need for further prune. */
6478 /* Prune the new candidate set. */
6479 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6480 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6481 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6482 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6484 if (acost
< orig_cost
)
6490 iv_ca_delta_free (&act_delta
);
6496 /* Tries to extend the sets IVS in the best possible way in order to
6497 express the GROUP. If ORIGINALP is true, prefer candidates from
6498 the original set of IVs, otherwise favor important candidates not
6499 based on any memory object. */
6502 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6503 struct iv_group
*group
, bool originalp
)
6505 comp_cost best_cost
, act_cost
;
6508 struct iv_cand
*cand
;
6509 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6510 struct cost_pair
*cp
;
6512 iv_ca_add_group (data
, ivs
, group
);
6513 best_cost
= iv_ca_cost (ivs
);
6514 cp
= iv_ca_cand_for_group (ivs
, group
);
6517 best_delta
= iv_ca_delta_add (group
, NULL
, cp
, NULL
);
6518 iv_ca_set_no_cp (data
, ivs
, group
);
6521 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6522 first try important candidates not based on any memory object. Only if
6523 this fails, try the specific ones. Rationale -- in loops with many
6524 variables the best choice often is to use just one generic biv. If we
6525 added here many ivs specific to the uses, the optimization algorithm later
6526 would be likely to get stuck in a local minimum, thus causing us to create
6527 too many ivs. The approach from few ivs to more seems more likely to be
6528 successful -- starting from few ivs, replacing an expensive use by a
6529 specific iv should always be a win. */
6530 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, i
, bi
)
6532 cand
= data
->vcands
[i
];
6534 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6537 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6540 if (iv_ca_cand_used_p (ivs
, cand
))
6543 cp
= get_group_iv_cost (data
, group
, cand
);
6547 iv_ca_set_cp (data
, ivs
, group
, cp
);
6548 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6550 iv_ca_set_no_cp (data
, ivs
, group
);
6551 act_delta
= iv_ca_delta_add (group
, NULL
, cp
, act_delta
);
6553 if (act_cost
< best_cost
)
6555 best_cost
= act_cost
;
6557 iv_ca_delta_free (&best_delta
);
6558 best_delta
= act_delta
;
6561 iv_ca_delta_free (&act_delta
);
6564 if (best_cost
.infinite_cost_p ())
6566 for (i
= 0; i
< group
->n_map_members
; i
++)
6568 cp
= group
->cost_map
+ i
;
6573 /* Already tried this. */
6574 if (cand
->important
)
6576 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6578 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6582 if (iv_ca_cand_used_p (ivs
, cand
))
6586 iv_ca_set_cp (data
, ivs
, group
, cp
);
6587 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6588 iv_ca_set_no_cp (data
, ivs
, group
);
6589 act_delta
= iv_ca_delta_add (group
,
6590 iv_ca_cand_for_group (ivs
, group
),
6593 if (act_cost
< best_cost
)
6595 best_cost
= act_cost
;
6598 iv_ca_delta_free (&best_delta
);
6599 best_delta
= act_delta
;
6602 iv_ca_delta_free (&act_delta
);
6606 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6607 iv_ca_delta_free (&best_delta
);
6609 return !best_cost
.infinite_cost_p ();
6612 /* Finds an initial assignment of candidates to uses. */
6614 static struct iv_ca
*
6615 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6618 struct iv_ca
*ivs
= iv_ca_new (data
);
6620 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6621 if (!try_add_cand_for (data
, ivs
, data
->vgroups
[i
], originalp
))
6630 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6631 points to a bool variable, this function tries to break local
6632 optimal fixed-point by replacing candidates in IVS if it's true. */
6635 try_improve_iv_set (struct ivopts_data
*data
,
6636 struct iv_ca
*ivs
, bool *try_replace_p
)
6639 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6640 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6641 struct iv_cand
*cand
;
6643 /* Try extending the set of induction variables by one. */
6644 for (i
= 0; i
< data
->vcands
.length (); i
++)
6646 cand
= data
->vcands
[i
];
6648 if (iv_ca_cand_used_p (ivs
, cand
))
6651 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6655 /* If we successfully added the candidate and the set is small enough,
6656 try optimizing it by removing other candidates. */
6657 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6659 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6660 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6661 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6662 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6665 if (acost
< best_cost
)
6668 iv_ca_delta_free (&best_delta
);
6669 best_delta
= act_delta
;
6672 iv_ca_delta_free (&act_delta
);
6677 /* Try removing the candidates from the set instead. */
6678 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6680 if (!best_delta
&& *try_replace_p
)
6682 *try_replace_p
= false;
6683 /* So far candidate selecting algorithm tends to choose fewer IVs
6684 so that it can handle cases in which loops have many variables
6685 but the best choice is often to use only one general biv. One
6686 weakness is it can't handle opposite cases, in which different
6687 candidates should be chosen with respect to each use. To solve
6688 the problem, we replace candidates in a manner described by the
6689 comments of iv_ca_replace, thus give general algorithm a chance
6690 to break local optimal fixed-point in these cases. */
6691 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6698 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6699 iv_ca_delta_free (&best_delta
);
6700 return best_cost
== iv_ca_cost (ivs
);
6703 /* Attempts to find the optimal set of induction variables. We do simple
6704 greedy heuristic -- we try to replace at most one candidate in the selected
6705 solution and remove the unused ivs while this improves the cost. */
6707 static struct iv_ca
*
6708 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6711 bool try_replace_p
= true;
6713 /* Get the initial solution. */
6714 set
= get_initial_solution (data
, originalp
);
6717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6718 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6724 fprintf (dump_file
, "Initial set of candidates:\n");
6725 iv_ca_dump (data
, dump_file
, set
);
6728 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6732 fprintf (dump_file
, "Improved to:\n");
6733 iv_ca_dump (data
, dump_file
, set
);
6737 /* If the set has infinite_cost, it can't be optimal. */
6738 if (iv_ca_cost (set
).infinite_cost_p ())
6740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6742 "Overflow to infinite cost in try_improve_iv_set.\n");
6748 static struct iv_ca
*
6749 find_optimal_iv_set (struct ivopts_data
*data
)
6752 comp_cost cost
, origcost
;
6753 struct iv_ca
*set
, *origset
;
6755 /* Determine the cost based on a strategy that starts with original IVs,
6756 and try again using a strategy that prefers candidates not based
6758 origset
= find_optimal_iv_set_1 (data
, true);
6759 set
= find_optimal_iv_set_1 (data
, false);
6761 if (!origset
&& !set
)
6764 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6765 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6769 fprintf (dump_file
, "Original cost %" PRId64
" (complexity %d)\n\n",
6770 origcost
.cost
, origcost
.complexity
);
6771 fprintf (dump_file
, "Final cost %" PRId64
" (complexity %d)\n\n",
6772 cost
.cost
, cost
.complexity
);
6775 /* Choose the one with the best cost. */
6776 if (origcost
<= cost
)
6783 iv_ca_free (&origset
);
6785 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6787 struct iv_group
*group
= data
->vgroups
[i
];
6788 group
->selected
= iv_ca_cand_for_group (set
, group
)->cand
;
6794 /* Creates a new induction variable corresponding to CAND. */
6797 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6799 gimple_stmt_iterator incr_pos
;
6802 struct iv_group
*group
;
6805 gcc_assert (cand
->iv
!= NULL
);
6810 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6814 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6822 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6826 /* Mark that the iv is preserved. */
6827 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6828 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6830 /* Rewrite the increment so that it uses var_before directly. */
6831 use
= find_interesting_uses_op (data
, cand
->var_after
);
6832 group
= data
->vgroups
[use
->group_id
];
6833 group
->selected
= cand
;
6837 gimple_add_tmp_var (cand
->var_before
);
6839 base
= unshare_expr (cand
->iv
->base
);
6841 create_iv (base
, unshare_expr (cand
->iv
->step
),
6842 cand
->var_before
, data
->current_loop
,
6843 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6846 /* Creates new induction variables described in SET. */
6849 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6852 struct iv_cand
*cand
;
6855 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6857 cand
= data
->vcands
[i
];
6858 create_new_iv (data
, cand
);
6861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6863 fprintf (dump_file
, "Selected IV set for loop %d",
6864 data
->current_loop
->num
);
6865 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6866 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6867 LOCATION_LINE (data
->loop_loc
));
6868 fprintf (dump_file
, ", " HOST_WIDE_INT_PRINT_DEC
" avg niters",
6869 avg_loop_niter (data
->current_loop
));
6870 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6871 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6873 cand
= data
->vcands
[i
];
6874 dump_cand (dump_file
, cand
);
6876 fprintf (dump_file
, "\n");
6880 /* Rewrites USE (definition of iv used in a nonlinear expression)
6881 using candidate CAND. */
6884 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6885 struct iv_use
*use
, struct iv_cand
*cand
)
6888 gimple_stmt_iterator bsi
;
6889 tree comp
, type
= get_use_type (use
), tgt
;
6891 /* An important special case -- if we are asked to express value of
6892 the original iv by itself, just exit; there is no need to
6893 introduce a new computation (that might also need casting the
6894 variable to unsigned and back). */
6895 if (cand
->pos
== IP_ORIGINAL
6896 && cand
->incremented_at
== use
->stmt
)
6898 tree op
= NULL_TREE
;
6899 enum tree_code stmt_code
;
6901 gcc_assert (is_gimple_assign (use
->stmt
));
6902 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6904 /* Check whether we may leave the computation unchanged.
6905 This is the case only if it does not rely on other
6906 computations in the loop -- otherwise, the computation
6907 we rely upon may be removed in remove_unused_ivs,
6908 thus leading to ICE. */
6909 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6910 if (stmt_code
== PLUS_EXPR
6911 || stmt_code
== MINUS_EXPR
6912 || stmt_code
== POINTER_PLUS_EXPR
)
6914 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6915 op
= gimple_assign_rhs2 (use
->stmt
);
6916 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6917 op
= gimple_assign_rhs1 (use
->stmt
);
6920 if (op
!= NULL_TREE
)
6922 if (expr_invariant_in_loop_p (data
->current_loop
, op
))
6924 if (TREE_CODE (op
) == SSA_NAME
)
6926 struct iv
*iv
= get_iv (data
, op
);
6927 if (iv
!= NULL
&& integer_zerop (iv
->step
))
6933 switch (gimple_code (use
->stmt
))
6936 tgt
= PHI_RESULT (use
->stmt
);
6938 /* If we should keep the biv, do not replace it. */
6939 if (name_info (data
, tgt
)->preserve_biv
)
6942 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6946 tgt
= gimple_assign_lhs (use
->stmt
);
6947 bsi
= gsi_for_stmt (use
->stmt
);
6954 aff_tree aff_inv
, aff_var
;
6955 if (!get_computation_aff_1 (data
->current_loop
, use
->stmt
,
6956 use
, cand
, &aff_inv
, &aff_var
))
6959 unshare_aff_combination (&aff_inv
);
6960 unshare_aff_combination (&aff_var
);
6961 /* Prefer CSE opportunity than loop invariant by adding offset at last
6962 so that iv_uses have different offsets can be CSEed. */
6963 poly_widest_int offset
= aff_inv
.offset
;
6966 gimple_seq stmt_list
= NULL
, seq
= NULL
;
6967 tree comp_op1
= aff_combination_to_tree (&aff_inv
);
6968 tree comp_op2
= aff_combination_to_tree (&aff_var
);
6969 gcc_assert (comp_op1
&& comp_op2
);
6971 comp_op1
= force_gimple_operand (comp_op1
, &seq
, true, NULL
);
6972 gimple_seq_add_seq (&stmt_list
, seq
);
6973 comp_op2
= force_gimple_operand (comp_op2
, &seq
, true, NULL
);
6974 gimple_seq_add_seq (&stmt_list
, seq
);
6976 if (POINTER_TYPE_P (TREE_TYPE (comp_op2
)))
6977 std::swap (comp_op1
, comp_op2
);
6979 if (POINTER_TYPE_P (TREE_TYPE (comp_op1
)))
6981 comp
= fold_build_pointer_plus (comp_op1
,
6982 fold_convert (sizetype
, comp_op2
));
6983 comp
= fold_build_pointer_plus (comp
,
6984 wide_int_to_tree (sizetype
, offset
));
6988 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp_op1
,
6989 fold_convert (TREE_TYPE (comp_op1
), comp_op2
));
6990 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp
,
6991 wide_int_to_tree (TREE_TYPE (comp_op1
), offset
));
6994 comp
= fold_convert (type
, comp
);
6995 if (!valid_gimple_rhs_p (comp
)
6996 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6997 /* We can't allow re-allocating the stmt as it might be pointed
6999 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
7000 >= gimple_num_ops (gsi_stmt (bsi
)))))
7002 comp
= force_gimple_operand (comp
, &seq
, true, NULL
);
7003 gimple_seq_add_seq (&stmt_list
, seq
);
7004 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
7006 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
7007 /* As this isn't a plain copy we have to reset alignment
7009 if (SSA_NAME_PTR_INFO (comp
))
7010 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
7014 gsi_insert_seq_before (&bsi
, stmt_list
, GSI_SAME_STMT
);
7015 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
7017 ass
= gimple_build_assign (tgt
, comp
);
7018 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
7020 bsi
= gsi_for_stmt (use
->stmt
);
7021 remove_phi_node (&bsi
, false);
7025 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
7026 use
->stmt
= gsi_stmt (bsi
);
7030 /* Performs a peephole optimization to reorder the iv update statement with
7031 a mem ref to enable instruction combining in later phases. The mem ref uses
7032 the iv value before the update, so the reordering transformation requires
7033 adjustment of the offset. CAND is the selected IV_CAND.
7037 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7045 directly propagating t over to (1) will introduce overlapping live range
7046 thus increase register pressure. This peephole transform it into:
7050 t = MEM_REF (base, iv2, 8, 8);
7057 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7060 gimple
*iv_update
, *stmt
;
7062 gimple_stmt_iterator gsi
, gsi_iv
;
7064 if (cand
->pos
!= IP_NORMAL
)
7067 var_after
= cand
->var_after
;
7068 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7070 bb
= gimple_bb (iv_update
);
7071 gsi
= gsi_last_nondebug_bb (bb
);
7072 stmt
= gsi_stmt (gsi
);
7074 /* Only handle conditional statement for now. */
7075 if (gimple_code (stmt
) != GIMPLE_COND
)
7078 gsi_prev_nondebug (&gsi
);
7079 stmt
= gsi_stmt (gsi
);
7080 if (stmt
!= iv_update
)
7083 gsi_prev_nondebug (&gsi
);
7084 if (gsi_end_p (gsi
))
7087 stmt
= gsi_stmt (gsi
);
7088 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7091 if (stmt
!= use
->stmt
)
7094 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7097 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7099 fprintf (dump_file
, "Reordering \n");
7100 print_gimple_stmt (dump_file
, iv_update
, 0);
7101 print_gimple_stmt (dump_file
, use
->stmt
, 0);
7102 fprintf (dump_file
, "\n");
7105 gsi
= gsi_for_stmt (use
->stmt
);
7106 gsi_iv
= gsi_for_stmt (iv_update
);
7107 gsi_move_before (&gsi_iv
, &gsi
);
7109 cand
->pos
= IP_BEFORE_USE
;
7110 cand
->incremented_at
= use
->stmt
;
7113 /* Return the alias pointer type that should be used for a MEM_REF
7114 associated with USE, which has type USE_PTR_ADDRESS. */
7117 get_alias_ptr_type_for_ptr_address (iv_use
*use
)
7119 gcall
*call
= as_a
<gcall
*> (use
->stmt
);
7120 switch (gimple_call_internal_fn (call
))
7123 case IFN_MASK_STORE
:
7124 case IFN_MASK_LOAD_LANES
:
7125 case IFN_MASK_STORE_LANES
:
7126 /* The second argument contains the correct alias type. */
7127 gcc_assert (use
->op_p
= gimple_call_arg_ptr (call
, 0));
7128 return TREE_TYPE (gimple_call_arg (call
, 1));
7136 /* Rewrites USE (address that is an iv) using candidate CAND. */
7139 rewrite_use_address (struct ivopts_data
*data
,
7140 struct iv_use
*use
, struct iv_cand
*cand
)
7145 adjust_iv_update_pos (cand
, use
);
7146 ok
= get_computation_aff (data
->current_loop
, use
->stmt
, use
, cand
, &aff
);
7148 unshare_aff_combination (&aff
);
7150 /* To avoid undefined overflow problems, all IV candidates use unsigned
7151 integer types. The drawback is that this makes it impossible for
7152 create_mem_ref to distinguish an IV that is based on a memory object
7153 from one that represents simply an offset.
7155 To work around this problem, we pass a hint to create_mem_ref that
7156 indicates which variable (if any) in aff is an IV based on a memory
7157 object. Note that we only consider the candidate. If this is not
7158 based on an object, the base of the reference is in some subexpression
7159 of the use -- but these will use pointer types, so they are recognized
7160 by the create_mem_ref heuristics anyway. */
7161 tree iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7162 tree base_hint
= (cand
->iv
->base_object
) ? iv
: NULL_TREE
;
7163 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7164 tree type
= use
->mem_type
;
7165 tree alias_ptr_type
;
7166 if (use
->type
== USE_PTR_ADDRESS
)
7167 alias_ptr_type
= get_alias_ptr_type_for_ptr_address (use
);
7170 gcc_assert (type
== TREE_TYPE (*use
->op_p
));
7171 unsigned int align
= get_object_alignment (*use
->op_p
);
7172 if (align
!= TYPE_ALIGN (type
))
7173 type
= build_aligned_type (type
, align
);
7174 alias_ptr_type
= reference_alias_ptr_type (*use
->op_p
);
7176 tree ref
= create_mem_ref (&bsi
, type
, &aff
, alias_ptr_type
,
7177 iv
, base_hint
, data
->speed
);
7179 if (use
->type
== USE_PTR_ADDRESS
)
7181 ref
= fold_build1 (ADDR_EXPR
, build_pointer_type (use
->mem_type
), ref
);
7182 ref
= fold_convert (get_use_type (use
), ref
);
7183 ref
= force_gimple_operand_gsi (&bsi
, ref
, true, NULL_TREE
,
7184 true, GSI_SAME_STMT
);
7187 copy_ref_info (ref
, *use
->op_p
);
7192 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7196 rewrite_use_compare (struct ivopts_data
*data
,
7197 struct iv_use
*use
, struct iv_cand
*cand
)
7199 tree comp
, op
, bound
;
7200 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7201 enum tree_code compare
;
7202 struct iv_group
*group
= data
->vgroups
[use
->group_id
];
7203 struct cost_pair
*cp
= get_group_iv_cost (data
, group
, cand
);
7208 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7209 tree var_type
= TREE_TYPE (var
);
7212 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7214 fprintf (dump_file
, "Replacing exit test: ");
7215 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7218 bound
= unshare_expr (fold_convert (var_type
, bound
));
7219 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7221 gsi_insert_seq_on_edge_immediate (
7222 loop_preheader_edge (data
->current_loop
),
7225 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7226 gimple_cond_set_lhs (cond_stmt
, var
);
7227 gimple_cond_set_code (cond_stmt
, compare
);
7228 gimple_cond_set_rhs (cond_stmt
, op
);
7232 /* The induction variable elimination failed; just express the original
7234 comp
= get_computation_at (data
->current_loop
, use
->stmt
, use
, cand
);
7235 gcc_assert (comp
!= NULL_TREE
);
7236 gcc_assert (use
->op_p
!= NULL
);
7237 *use
->op_p
= force_gimple_operand_gsi (&bsi
, comp
, true,
7238 SSA_NAME_VAR (*use
->op_p
),
7239 true, GSI_SAME_STMT
);
7242 /* Rewrite the groups using the selected induction variables. */
7245 rewrite_groups (struct ivopts_data
*data
)
7249 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7251 struct iv_group
*group
= data
->vgroups
[i
];
7252 struct iv_cand
*cand
= group
->selected
;
7256 if (group
->type
== USE_NONLINEAR_EXPR
)
7258 for (j
= 0; j
< group
->vuses
.length (); j
++)
7260 rewrite_use_nonlinear_expr (data
, group
->vuses
[j
], cand
);
7261 update_stmt (group
->vuses
[j
]->stmt
);
7264 else if (address_p (group
->type
))
7266 for (j
= 0; j
< group
->vuses
.length (); j
++)
7268 rewrite_use_address (data
, group
->vuses
[j
], cand
);
7269 update_stmt (group
->vuses
[j
]->stmt
);
7274 gcc_assert (group
->type
== USE_COMPARE
);
7276 for (j
= 0; j
< group
->vuses
.length (); j
++)
7278 rewrite_use_compare (data
, group
->vuses
[j
], cand
);
7279 update_stmt (group
->vuses
[j
]->stmt
);
7285 /* Removes the ivs that are not used after rewriting. */
7288 remove_unused_ivs (struct ivopts_data
*data
, bitmap toremove
)
7293 /* Figure out an order in which to release SSA DEFs so that we don't
7294 release something that we'd have to propagate into a debug stmt
7296 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7298 struct version_info
*info
;
7300 info
= ver_info (data
, j
);
7302 && !integer_zerop (info
->iv
->step
)
7304 && !info
->iv
->nonlin_use
7305 && !info
->preserve_biv
)
7307 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7309 tree def
= info
->iv
->ssa_name
;
7311 if (MAY_HAVE_DEBUG_BIND_STMTS
&& SSA_NAME_DEF_STMT (def
))
7313 imm_use_iterator imm_iter
;
7314 use_operand_p use_p
;
7318 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7320 if (!gimple_debug_bind_p (stmt
))
7323 /* We just want to determine whether to do nothing
7324 (count == 0), to substitute the computed
7325 expression into a single use of the SSA DEF by
7326 itself (count == 1), or to use a debug temp
7327 because the SSA DEF is used multiple times or as
7328 part of a larger expression (count > 1). */
7330 if (gimple_debug_bind_get_value (stmt
) != def
)
7334 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7340 struct iv_use dummy_use
;
7341 struct iv_cand
*best_cand
= NULL
, *cand
;
7342 unsigned i
, best_pref
= 0, cand_pref
;
7344 memset (&dummy_use
, 0, sizeof (dummy_use
));
7345 dummy_use
.iv
= info
->iv
;
7346 for (i
= 0; i
< data
->vgroups
.length () && i
< 64; i
++)
7348 cand
= data
->vgroups
[i
]->selected
;
7349 if (cand
== best_cand
)
7351 cand_pref
= operand_equal_p (cand
->iv
->step
,
7355 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7356 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7359 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7361 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7364 best_pref
= cand_pref
;
7371 tree comp
= get_computation_at (data
->current_loop
,
7372 SSA_NAME_DEF_STMT (def
),
7373 &dummy_use
, best_cand
);
7379 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7380 DECL_ARTIFICIAL (vexpr
) = 1;
7381 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7382 if (SSA_NAME_VAR (def
))
7383 SET_DECL_MODE (vexpr
, DECL_MODE (SSA_NAME_VAR (def
)));
7385 SET_DECL_MODE (vexpr
, TYPE_MODE (TREE_TYPE (vexpr
)));
7387 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7388 gimple_stmt_iterator gsi
;
7390 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7391 gsi
= gsi_after_labels (gimple_bb
7392 (SSA_NAME_DEF_STMT (def
)));
7394 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7396 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7400 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7402 if (!gimple_debug_bind_p (stmt
))
7405 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7406 SET_USE (use_p
, comp
);
7415 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7416 for hash_map::traverse. */
7419 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7425 /* Frees data allocated by the optimization of a single loop. */
7428 free_loop_data (struct ivopts_data
*data
)
7436 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7437 delete data
->niters
;
7438 data
->niters
= NULL
;
7441 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7443 struct version_info
*info
;
7445 info
= ver_info (data
, i
);
7447 info
->has_nonlin_use
= false;
7448 info
->preserve_biv
= false;
7451 bitmap_clear (data
->relevant
);
7452 bitmap_clear (data
->important_candidates
);
7454 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7456 struct iv_group
*group
= data
->vgroups
[i
];
7458 for (j
= 0; j
< group
->vuses
.length (); j
++)
7459 free (group
->vuses
[j
]);
7460 group
->vuses
.release ();
7462 BITMAP_FREE (group
->related_cands
);
7463 for (j
= 0; j
< group
->n_map_members
; j
++)
7465 if (group
->cost_map
[j
].inv_vars
)
7466 BITMAP_FREE (group
->cost_map
[j
].inv_vars
);
7467 if (group
->cost_map
[j
].inv_exprs
)
7468 BITMAP_FREE (group
->cost_map
[j
].inv_exprs
);
7471 free (group
->cost_map
);
7474 data
->vgroups
.truncate (0);
7476 for (i
= 0; i
< data
->vcands
.length (); i
++)
7478 struct iv_cand
*cand
= data
->vcands
[i
];
7481 BITMAP_FREE (cand
->inv_vars
);
7482 if (cand
->inv_exprs
)
7483 BITMAP_FREE (cand
->inv_exprs
);
7486 data
->vcands
.truncate (0);
7488 if (data
->version_info_size
< num_ssa_names
)
7490 data
->version_info_size
= 2 * num_ssa_names
;
7491 free (data
->version_info
);
7492 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7495 data
->max_inv_var_id
= 0;
7496 data
->max_inv_expr_id
= 0;
7498 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7499 SET_DECL_RTL (obj
, NULL_RTX
);
7501 decl_rtl_to_reset
.truncate (0);
7503 data
->inv_expr_tab
->empty ();
7505 data
->iv_common_cand_tab
->empty ();
7506 data
->iv_common_cands
.truncate (0);
7509 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7513 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7515 free_loop_data (data
);
7516 free (data
->version_info
);
7517 BITMAP_FREE (data
->relevant
);
7518 BITMAP_FREE (data
->important_candidates
);
7520 decl_rtl_to_reset
.release ();
7521 data
->vgroups
.release ();
7522 data
->vcands
.release ();
7523 delete data
->inv_expr_tab
;
7524 data
->inv_expr_tab
= NULL
;
7525 free_affine_expand_cache (&data
->name_expansion_cache
);
7526 delete data
->iv_common_cand_tab
;
7527 data
->iv_common_cand_tab
= NULL
;
7528 data
->iv_common_cands
.release ();
7529 obstack_free (&data
->iv_obstack
, NULL
);
7532 /* Returns true if the loop body BODY includes any function calls. */
7535 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7537 gimple_stmt_iterator gsi
;
7540 for (i
= 0; i
< num_nodes
; i
++)
7541 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7543 gimple
*stmt
= gsi_stmt (gsi
);
7544 if (is_gimple_call (stmt
)
7545 && !gimple_call_internal_p (stmt
)
7546 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7552 /* Determine cost scaling factor for basic blocks in loop. */
7553 #define COST_SCALING_FACTOR_BOUND (20)
7556 determine_scaling_factor (struct ivopts_data
*data
, basic_block
*body
)
7558 int lfreq
= data
->current_loop
->header
->count
.to_frequency (cfun
);
7559 if (!data
->speed
|| lfreq
<= 0)
7562 int max_freq
= lfreq
;
7563 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7565 body
[i
]->aux
= (void *)(intptr_t) 1;
7566 if (max_freq
< body
[i
]->count
.to_frequency (cfun
))
7567 max_freq
= body
[i
]->count
.to_frequency (cfun
);
7569 if (max_freq
> lfreq
)
7571 int divisor
, factor
;
7572 /* Check if scaling factor itself needs to be scaled by the bound. This
7573 is to avoid overflow when scaling cost according to profile info. */
7574 if (max_freq
/ lfreq
> COST_SCALING_FACTOR_BOUND
)
7577 factor
= COST_SCALING_FACTOR_BOUND
;
7584 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7586 int bfreq
= body
[i
]->count
.to_frequency (cfun
);
7590 body
[i
]->aux
= (void*)(intptr_t) (factor
* bfreq
/ divisor
);
7595 /* Optimizes the LOOP. Returns true if anything changed. */
7598 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
,
7601 bool changed
= false;
7602 struct iv_ca
*iv_ca
;
7603 edge exit
= single_dom_exit (loop
);
7606 gcc_assert (!data
->niters
);
7607 data
->current_loop
= loop
;
7608 data
->loop_loc
= find_loop_location (loop
).get_location_t ();
7609 data
->speed
= optimize_loop_for_speed_p (loop
);
7611 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7613 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7614 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7615 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7616 LOCATION_LINE (data
->loop_loc
));
7617 fprintf (dump_file
, "\n");
7621 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7622 exit
->src
->index
, exit
->dest
->index
);
7623 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7624 fprintf (dump_file
, "\n");
7627 fprintf (dump_file
, "\n");
7630 body
= get_loop_body (loop
);
7631 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7632 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7634 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7636 /* For each ssa name determines whether it behaves as an induction variable
7638 if (!find_induction_variables (data
))
7641 /* Finds interesting uses (item 1). */
7642 find_interesting_uses (data
);
7643 if (data
->vgroups
.length () > MAX_CONSIDERED_GROUPS
)
7646 /* Determine cost scaling factor for basic blocks in loop. */
7647 determine_scaling_factor (data
, body
);
7649 /* Finds candidates for the induction variables (item 2). */
7650 find_iv_candidates (data
);
7652 /* Calculates the costs (item 3, part 1). */
7653 determine_iv_costs (data
);
7654 determine_group_iv_costs (data
);
7655 determine_set_costs (data
);
7657 /* Find the optimal set of induction variables (item 3, part 2). */
7658 iv_ca
= find_optimal_iv_set (data
);
7659 /* Cleanup basic block aux field. */
7660 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7661 body
[i
]->aux
= NULL
;
7666 /* Create the new induction variables (item 4, part 1). */
7667 create_new_ivs (data
, iv_ca
);
7668 iv_ca_free (&iv_ca
);
7670 /* Rewrite the uses (item 4, part 2). */
7671 rewrite_groups (data
);
7673 /* Remove the ivs that are unused after rewriting. */
7674 remove_unused_ivs (data
, toremove
);
7678 free_loop_data (data
);
7683 /* Main entry point. Optimizes induction variables in loops. */
7686 tree_ssa_iv_optimize (void)
7689 struct ivopts_data data
;
7690 auto_bitmap toremove
;
7692 tree_ssa_iv_optimize_init (&data
);
7694 /* Optimize the loops starting with the innermost ones. */
7695 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7697 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7698 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7700 tree_ssa_iv_optimize_loop (&data
, loop
, toremove
);
7703 /* Remove eliminated IV defs. */
7704 release_defs_bitset (toremove
);
7706 /* We have changed the structure of induction variables; it might happen
7707 that definitions in the scev database refer to some of them that were
7710 /* Likewise niter and control-IV information. */
7711 free_numbers_of_iterations_estimates (cfun
);
7713 tree_ssa_iv_optimize_finalize (&data
);
7716 #include "gt-tree-ssa-loop-ivopts.h"